pax_global_header00006660000000000000000000000064136054007400014511gustar00rootroot0000000000000052 comment=ee1c251f55c27d3269a3ef73add8c6af8afd7d7d prometheus-2.15.2+ds/000077500000000000000000000000001360540074000143755ustar00rootroot00000000000000prometheus-2.15.2+ds/.circleci/000077500000000000000000000000001360540074000162305ustar00rootroot00000000000000prometheus-2.15.2+ds/.circleci/config.yml000066400000000000000000000062741360540074000202310ustar00rootroot00000000000000--- version: 2.1 orbs: prometheus: prometheus/prometheus@0.3.0 go: circleci/go@0.2.0 win: circleci/windows@2.3.0 executors: # Whenever the Go version is updated here, .promu.yml # should also be updated. golang: docker: - image: circleci/golang:1.13-node fuzzit: docker: - image: fuzzitdev/golang:1.12.7-buster jobs: test: executor: golang steps: - prometheus/setup_environment - go/load-cache: key: v1 - restore_cache: keys: - v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} - v1-npm-deps- - run: command: make environment: # Run garbage collection more aggressively to avoid getting OOMed during the lint phase. GOGC: "20" # By default Go uses GOMAXPROCS but a Circle CI executor has many # cores (> 30) while the CPU and RAM resources are throttled. If we # don't limit this to the number of allocated cores, the job is # likely to get OOMed and killed. GOOPTS: "-p 2" - prometheus/check_proto - prometheus/store_artifact: file: prometheus - prometheus/store_artifact: file: promtool - go/save-cache: key: v1 - save_cache: key: v1-npm-deps-{{ checksum "web/ui/react-app/yarn.lock" }} paths: - web/ui/react-app/node_modules test_windows: executor: win/default working_directory: /go/src/github.com/prometheus/prometheus steps: - checkout # TSDB is where the most risk is Windows wise, so only test there for now. - run: go test ./tsdb/... fuzzit_regression: executor: fuzzit working_directory: /go/src/github.com/prometheus/prometheus steps: - checkout - setup_remote_docker - run: ./fuzzit.sh local-regression fuzzit_fuzzing: executor: fuzzit working_directory: /go/src/github.com/prometheus/prometheus steps: - checkout - setup_remote_docker - run: ./fuzzit.sh fuzzing makefile_sync: executor: golang steps: - checkout - run: ./scripts/sync_makefiles.sh workflows: version: 2 prometheus: jobs: - test: filters: tags: only: /.*/ - test_windows: filters: tags: only: /.*/ - fuzzit_regression: filters: tags: only: /.*/ - prometheus/build: name: build filters: tags: only: /.*/ - prometheus/publish_master: context: org-context requires: - test - build filters: branches: only: master image: circleci/golang:1-node - prometheus/publish_release: context: org-context requires: - test - build filters: tags: only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ branches: ignore: /.*/ image: circleci/golang:1-node nightly: triggers: - schedule: cron: "0 0 * * *" filters: branches: only: - master jobs: - makefile_sync: context: org-context - fuzzit_fuzzing: context: org-context prometheus-2.15.2+ds/.dockerignore000066400000000000000000000001311360540074000170440ustar00rootroot00000000000000data/ .build/ .tarballs/ !.build/linux-amd64/ !.build/linux-armv7/ !.build/linux-arm64/ prometheus-2.15.2+ds/.github/000077500000000000000000000000001360540074000157355ustar00rootroot00000000000000prometheus-2.15.2+ds/.github/ISSUE_TEMPLATE.md000066400000000000000000000020721360540074000204430ustar00rootroot00000000000000 ## Proposal **Use case. Why is this important?** *“Nice to have” is not a good use case. :)* ## Bug Report **What did you do?** **What did you expect to see?** **What did you see instead? Under which circumstances?** **Environment** * System information: insert output of `uname -srm` here * Prometheus version: insert output of `prometheus --version` here * Alertmanager version: insert output of `alertmanager --version` here (if relevant to the issue) * Prometheus configuration file: ``` insert configuration here ``` * Alertmanager configuration file: ``` insert configuration here (if relevant to the issue) ``` * Logs: ``` insert Prometheus and Alertmanager logs relevant to the issue here ``` prometheus-2.15.2+ds/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000010701360540074000215340ustar00rootroot00000000000000prometheus-2.15.2+ds/.github/lock.yml000066400000000000000000000017461360540074000174200ustar00rootroot00000000000000# Configuration for Lock Threads - https://github.com/dessant/lock-threads # Number of days of inactivity before a closed issue or pull request is locked daysUntilLock: 180 # Skip issues and pull requests created before a given timestamp. Timestamp must # follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable skipCreatedBefore: false # Issues and pull requests with these labels will be ignored. Set to `[]` to disable exemptLabels: [] # Label to add before locking, such as `outdated`. Set to `false` to disable lockLabel: false # Comment to post before locking. Set to `false` to disable lockComment: false # Assign `resolved` as the reason for locking. Set to `false` to disable setLockReason: false # Limit to only `issues` or `pulls` only: issues # Optionally, specify configuration settings just for `issues` or `pulls` # issues: # exemptLabels: # - help-wanted # lockLabel: outdated # pulls: # daysUntilLock: 30 # Repository to extend settings from # _extends: repo prometheus-2.15.2+ds/.gitignore000066400000000000000000000006271360540074000163720ustar00rootroot00000000000000*# .#* /*.yaml /*.yml *.exe /prometheus /promtool /tsdb/tsdb benchmark.txt /data /cmd/prometheus/data /cmd/prometheus/debug !/.travis.yml !/.promu.yml !/.golangci.yml /documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter /documentation/examples/remote_storage/example_write_adapter/example_writer_adapter npm_licenses.tar.bz2 /web/ui/static/react /web/ui/assets_vfsdata.go prometheus-2.15.2+ds/.golangci.yml000066400000000000000000000003071360540074000167610ustar00rootroot00000000000000run: modules-download-mode: vendor deadline: 5m issues: exclude-rules: - path: _test.go linters: - errcheck linters-settings: errcheck: exclude: scripts/errcheck_excludes.txt prometheus-2.15.2+ds/.promu.yml000066400000000000000000000027651360540074000163520ustar00rootroot00000000000000go: # Whenever the Go version is updated here, # .circle/config.yml should also be updated. version: 1.13 repository: path: github.com/prometheus/prometheus build: binaries: - name: prometheus path: ./cmd/prometheus - name: promtool path: ./cmd/promtool - name: tsdb path: ./tsdb/cmd/tsdb flags: -mod=vendor -a -tags netgo,builtinassets ldflags: | -X github.com/prometheus/common/version.Version={{.Version}} -X github.com/prometheus/common/version.Revision={{.Revision}} -X github.com/prometheus/common/version.Branch={{.Branch}} -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}} -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} tarball: files: - consoles - console_libraries - documentation/examples/prometheus.yml - LICENSE - NOTICE - npm_licenses.tar.bz2 crossbuild: platforms: - linux/amd64 - linux/386 - darwin/amd64 - darwin/386 - windows/amd64 - windows/386 - freebsd/amd64 - freebsd/386 - openbsd/amd64 - openbsd/386 - netbsd/amd64 - netbsd/386 - dragonfly/amd64 - linux/arm - linux/arm64 - freebsd/arm - openbsd/arm - linux/mips64 - linux/mips64le - netbsd/arm - linux/ppc64 - linux/ppc64le - linux/s390x prometheus-2.15.2+ds/CHANGELOG.md000066400000000000000000002451421360540074000162160ustar00rootroot00000000000000## 2.15.2 / 2020-01-06 * [BUGFIX] TSDB: Fixed support for TSDB blocks built with Prometheus before 2.1.0. #6564 * [BUGFIX] TSDB: Fixed block compaction issues on Windows. #6547 ## 2.15.1 / 2019-12-25 * [BUGFIX] TSDB: Fixed race on concurrent queries against same data. #6512 ## 2.15.0 / 2019-12-23 * [CHANGE] Discovery: Removed `prometheus_sd_kubernetes_cache_*` metrics. Additionally `prometheus_sd_kubernetes_workqueue_latency_seconds` and `prometheus_sd_kubernetes_workqueue_work_duration_seconds` metrics now show correct values in seconds. #6393 * [CHANGE] Remote write: Changed `query` label on `prometheus_remote_storage_*` metrics to `remote_name` and `url`. #6043 * [FEATURE] API: Added new endpoint for exposing per metric metadata `/metadata`. #6420 #6442 * [ENHANCEMENT] TSDB: Significantly reduced memory footprint of loaded TSDB blocks. #6418 #6461 * [ENHANCEMENT] TSDB: Significantly optimized what we buffer during compaction which should result in lower memory footprint during compaction. #6422 #6452 #6468 #6475 * [ENHANCEMENT] TSDB: Improve replay latency. #6230 * [ENHANCEMENT] TSDB: WAL size is now used for size based retention calculation. #5886 * [ENHANCEMENT] Remote read: Added query grouping and range hints to the remote read request #6401 * [ENHANCEMENT] Remote write: Added `prometheus_remote_storage_sent_bytes_total` counter per queue. #6344 * [ENHANCEMENT] promql: Improved PromQL parser performance. #6356 * [ENHANCEMENT] React UI: Implemented missing pages like `/targets` #6276, TSDB status page #6267 and many other fixes and performance improvements. * [ENHANCEMENT] promql: Prometheus now accepts spaces between time range and square bracket. e.g `[ 5m]` #6065 * [BUGFIX] Config: Fixed alertmanager configuration to not miss targets when configurations are similar. #6455 * [BUGFIX] Remote write: Value of `prometheus_remote_storage_shards_desired` gauge shows raw value of desired shards and it's updated correctly. #6378 * [BUGFIX] Rules: Prometheus now fails the evaluation of rules and alerts where metric results collide with labels specified in `labels` field. #6469 * [BUGFIX] API: Targets Metadata API `/targets/metadata` now accepts empty `match_targets` parameter as in the spec. #6303 ## 2.14.0 / 2019-11-11 * [SECURITY/BUGFIX] UI: Ensure warnings from the API are escaped. #6279 * [FEATURE] API: `/api/v1/status/runtimeinfo` and `/api/v1/status/buildinfo` endpoints added for use by the React UI. #6243 * [FEATURE] React UI: implement the new experimental React based UI. #5694 and many more * Can be found by under `/new`. * Not all pages are implemented yet. * [FEATURE] Status: Cardinality statistics added to the Runtime & Build Information page. #6125 * [ENHANCEMENT/BUGFIX] Remote write: fix delays in remote write after a compaction. #6021 * [ENHANCEMENT] UI: Alerts can be filtered by state. #5758 * [BUGFIX] API: lifecycle endpoints return 403 when not enabled. #6057 * [BUGFIX] Build: Fix Solaris build. #6149 * [BUGFIX] Promtool: Remove false duplicate rule warnings when checking rule files with alerts. #6270 * [BUGFIX] Remote write: restore use of deduplicating logger in remote write. #6113 * [BUGFIX] Remote write: do not reshard when unable to send samples. #6111 * [BUGFIX] Service discovery: errors are no longer logged on context cancellation. #6116, #6133 * [BUGFIX] UI: handle null response from API properly. #6071 ## 2.13.1 / 2019-10-16 * [BUGFIX] Fix panic in ARM builds of Prometheus. #6110 * [BUGFIX] promql: fix potential panic in the query logger. #6094 * [BUGFIX] Multiple errors of http: superfluous response.WriteHeader call in the logs. #6145 ## 2.13.0 / 2019-10-04 * [SECURITY/BUGFIX] UI: Fix a Stored DOM XSS vulnerability with query history [CVE-2019-10215](http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-10215). #6098 * [CHANGE] Metrics: renamed prometheus_sd_configs_failed_total to prometheus_sd_failed_configs and changed to Gauge #5254 * [ENHANCEMENT] Include the tsdb tool in builds. #6089 * [ENHANCEMENT] Service discovery: add new node address types for kubernetes. #5902 * [ENHANCEMENT] UI: show warnings if query have returned some warnings. #5964 * [ENHANCEMENT] Remote write: reduce memory usage of the series cache. #5849 * [ENHANCEMENT] Remote read: use remote read streaming to reduce memory usage. #5703 * [ENHANCEMENT] Metrics: added metrics for remote write max/min/desired shards to queue manager. #5787 * [ENHANCEMENT] Promtool: show the warnings during label query. #5924 * [ENHANCEMENT] Promtool: improve error messages when parsing bad rules. #5965 * [ENHANCEMENT] Promtool: more promlint rules. #5515 * [BUGFIX] Promtool: fix recording inconsistency due to duplicate labels. #6026 * [BUGFIX] UI: fixes service-discovery view when accessed from unhealthy targets. #5915 * [BUGFIX] Metrics format: OpenMetrics parser crashes on short input. #5939 * [BUGFIX] UI: avoid truncated Y-axis values. #6014 ## 2.12.0 / 2019-08-17 * [FEATURE] Track currently active PromQL queries in a log file. #5794 * [FEATURE] Enable and provide binaries for `mips64` / `mips64le` architectures. #5792 * [ENHANCEMENT] Improve responsiveness of targets web UI and API endpoint. #5740 * [ENHANCEMENT] Improve remote write desired shards calculation. #5763 * [ENHANCEMENT] Flush TSDB pages more precisely. tsdb#660 * [ENHANCEMENT] Add `prometheus_tsdb_retention_limit_bytes` metric. tsdb#667 * [ENHANCEMENT] Add logging during TSDB WAL replay on startup. tsdb#662 * [ENHANCEMENT] Improve TSDB memory usage. tsdb#653, tsdb#643, tsdb#654, tsdb#642, tsdb#627 * [BUGFIX] Check for duplicate label names in remote read. #5829 * [BUGFIX] Mark deleted rules' series as stale on next evaluation. #5759 * [BUGFIX] Fix JavaScript error when showing warning about out-of-sync server time. #5833 * [BUGFIX] Fix `promtool test rules` panic when providing empty `exp_labels`. #5774 * [BUGFIX] Only check last directory when discovering checkpoint number. #5756 * [BUGFIX] Fix error propagation in WAL watcher helper functions. #5741 * [BUGFIX] Correctly handle empty labels from alert templates. #5845 ## 2.11.1 / 2019-07-10 * [BUGFIX] Fix potential panic when prometheus is watching multiple zookeeper paths. #5749 ## 2.11.0 / 2019-07-09 * [CHANGE] Remove `max_retries` from queue_config (it has been unused since rewriting remote-write to utilize the write-ahead-log). #5649 * [CHANGE] The meta file `BlockStats` no longer holds size information. This is now dynamically calculated and kept in memory. It also includes the meta file size which was not included before. tsdb#637 * [CHANGE] Renamed metric from `prometheus_tsdb_wal_reader_corruption_errors` to `prometheus_tsdb_wal_reader_corruption_errors_total`. tsdb#622 * [FEATURE] Add option to use Alertmanager API v2. #5482 * [FEATURE] Added `humanizePercentage` function for templates. #5670 * [FEATURE] Include InitContainers in Kubernetes Service Discovery. #5598 * [FEATURE] Provide option to compress WAL records using Snappy. [#609](https://github.com/prometheus/tsdb/pull/609) * [ENHANCEMENT] Create new clean segment when starting the WAL. tsdb#608 * [ENHANCEMENT] Reduce allocations in PromQL aggregations. #5641 * [ENHANCEMENT] Add storage warnings to LabelValues and LabelNames API results. #5673 * [ENHANCEMENT] Add `prometheus_http_requests_total` metric. #5640 * [ENHANCEMENT] Enable openbsd/arm build. #5696 * [ENHANCEMENT] Remote-write allocation improvements. #5614 * [ENHANCEMENT] Query performance improvement: Efficient iteration and search in HashForLabels and HashWithoutLabels. #5707 * [ENHANCEMENT] Allow injection of arbitrary headers in promtool. #4389 * [ENHANCEMENT] Allow passing `external_labels` in alert unit tests groups. #5608 * [ENHANCEMENT] Allows globs for rules when unit testing. #5595 * [ENHANCEMENT] Improved postings intersection matching. tsdb#616 * [ENHANCEMENT] Reduced disk usage for WAL for small setups. tsdb#605 * [ENHANCEMENT] Optimize queries using regexp for set lookups. tsdb#602 * [BUGFIX] resolve race condition in maxGauge. #5647 * [BUGFIX] Fix ZooKeeper connection leak. #5675 * [BUGFIX] Improved atomicity of .tmp block replacement during compaction for usual case. tsdb#636 * [BUGFIX] Fix "unknown series references" after clean shutdown. tsdb#623 * [BUGFIX] Re-calculate block size when calling `block.Delete`. tsdb#637 * [BUGFIX] Fix unsafe snapshots with head block. tsdb#641 * [BUGFIX] `prometheus_tsdb_compactions_failed_total` is now incremented on any compaction failure. tsdb#613 ## 2.10.0 / 2019-05-25 * [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582 * [FEATURE] Template expansion: Make external labels available as `$externalLabels` in alert and console template expansion. #5463 * [FEATURE] TSDB: Add `prometheus_tsdb_wal_segment_current` metric for the WAL segment index that TSDB is currently writing to. tsdb#601 * [FEATURE] Scrape: Add `scrape_series_added` per-scrape metric. #5546 * [ENHANCEMENT] Discovery/kubernetes: Add labels `__meta_kubernetes_endpoint_node_name` and `__meta_kubernetes_endpoint_hostname`. #5571 * [ENHANCEMENT] Discovery/azure: Add label `__meta_azure_machine_public_ip`. #5475 * [ENHANCEMENT] TSDB: Simplify mergedPostings.Seek, resulting in better performance if there are many posting lists. tsdb#595 * [ENHANCEMENT] Log filesystem type on startup. #5558 * [ENHANCEMENT] Cmd/promtool: Use POST requests for Query and QueryRange. client_golang#557 * [ENHANCEMENT] Web: Sort alerts by group name. #5448 * [ENHANCEMENT] Console templates: Add convenience variables `$rawParams`, `$params`, `$path`. #5463 * [BUGFIX] TSDB: Don't panic when running out of disk space and recover nicely from the condition. tsdb#582 * [BUGFIX] TSDB: Correctly handle empty labels. tsdb#594 * [BUGFIX] TSDB: Don't crash on an unknown tombstone reference. tsdb#604 * [BUGFIX] Storage/remote: Remove queue-manager specific metrics if queue no longer exists. #5445 #5485 #5555 * [BUGFIX] PromQL: Correctly display `{__name__="a"}`. #5552 * [BUGFIX] Discovery/kubernetes: Use `service` rather than `ingress` as the name for the service workqueue. #5520 * [BUGFIX] Discovery/azure: Don't panic on a VM with a public IP. #5587 * [BUGFIX] Discovery/triton: Always read HTTP body to completion. #5596 * [BUGFIX] Web: Fixed Content-Type for js and css instead of using `/etc/mime.types`. #5551 ## 2.9.2 / 2019-04-24 * [BUGFIX] Make sure subquery range is taken into account for selection #5467 * [BUGFIX] Exhaust every request body before closing it #5166 * [BUGFIX] Cmd/promtool: return errors from rule evaluations #5483 * [BUGFIX] Remote Storage: string interner should not panic in release #5487 * [BUGFIX] Fix memory allocation regression in mergedPostings.Seek tsdb#586 ## 2.9.1 / 2019-04-16 * [BUGFIX] Discovery/kubernetes: fix missing label sanitization #5462 * [BUGFIX] Remote_write: Prevent reshard concurrent with calling stop #5460 ## 2.9.0 / 2019-04-15 This releases uses Go 1.12, which includes a change in how memory is released to Linux. This will cause RSS to be reported as higher, however this is harmless and the memory is available to the kernel when it needs it. * [CHANGE/ENHANCEMENT] Update Consul to support catalog.ServiceMultipleTags. #5151 * [FEATURE] Add honor_timestamps scrape option. #5304 * [ENHANCEMENT] Discovery/kubernetes: add present labels for labels/annotations. #5443 * [ENHANCEMENT] OpenStack SD: Add ProjectID and UserID meta labels. #5431 * [ENHANCEMENT] Add GODEBUG and retention to the runtime page. #5324 #5322 * [ENHANCEMENT] Add support for POSTing to /series endpoint. #5422 * [ENHANCEMENT] Support PUT methods for Lifecycle and Admin APIs. #5376 * [ENHANCEMENT] Scrape: Add global jitter for HA server. #5181 * [ENHANCEMENT] Check for cancellation on every step of a range evaluation. #5131 * [ENHANCEMENT] String interning for labels & values in the remote_write path. #5316 * [ENHANCEMENT] Don't lose the scrape cache on a failed scrape. #5414 * [ENHANCEMENT] Reload cert files from disk automatically. common#173 * [ENHANCEMENT] Use fixed length millisecond timestamp format for logs. common#172 * [ENHANCEMENT] Performance improvements for postings. tsdb#509 tsdb#572 * [BUGFIX] Remote Write: fix checkpoint reading. #5429 * [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316 * [BUGFIX] Promparse: sort all labels when parsing. #5372 * [BUGFIX] Reload rules: copy state on both name and labels. #5368 * [BUGFIX] Exponentiation operator to drop metric name in result of operation. #5329 * [BUGFIX] Config: resolve more file paths. #5284 * [BUGFIX] Promtool: resolve relative paths in alert test files. #5336 * [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179 * [BUGFIX] Use fsync to be more resilient to machine crashes. tsdb#573 tsdb#578 * [BUGFIX] Keep series that are still in WAL in checkpoints. tsdb#577 * [BUGFIX] Fix output sample values for scalar-to-vector comparison operations. #5454 ## 2.8.1 / 2019-03-28 * [BUGFIX] Display the job labels in `/targets` which was removed accidentally. #5406 ## 2.8.0 / 2019-03-12 This release uses Write-Ahead Logging (WAL) for the remote_write API. This currently causes a slight increase in memory usage, which will be addressed in future releases. * [CHANGE] Default time retention is used only when no size based retention is specified. These are flags where time retention is specified by the flag `--storage.tsdb.retention` and size retention by `--storage.tsdb.retention.size`. #5216 * [CHANGE] `prometheus_tsdb_storage_blocks_bytes_total` is now `prometheus_tsdb_storage_blocks_bytes`. prometheus/tsdb#506 * [FEATURE] [EXPERIMENTAL] Time overlapping blocks are now allowed; vertical compaction and vertical query merge. It is an optional feature which is controlled by the `--storage.tsdb.allow-overlapping-blocks` flag, disabled by default. prometheus/tsdb#370 * [ENHANCEMENT] Use the WAL for remote_write API. #4588 * [ENHANCEMENT] Query performance improvements. prometheus/tsdb#531 * [ENHANCEMENT] UI enhancements with upgrade to Bootstrap 4. #5226 * [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126 * [ENHANCEMENT] Limit number of metrics displayed on UI to 10000. #5139 * [ENHANCEMENT] (1) Remember All/Unhealthy choice on target-overview when reloading page. (2) Resize text-input area on Graph page on mouseclick. #5201 * [ENHANCEMENT] In `histogram_quantile` merge buckets with equivalent le values. #5158. * [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189 * [ENHANCEMENT] Show `Storage Retention` criteria in effect on `/status` page. #5322 * [BUGFIX] Fix sorting of rule groups. #5260 * [BUGFIX] Fix support for password_file and bearer_token_file in Kubernetes SD. #5211 * [BUGFIX] Scrape: catch errors when creating HTTP clients #5182. Adds new metrics: * `prometheus_target_scrape_pools_total` * `prometheus_target_scrape_pools_failed_total` * `prometheus_target_scrape_pool_reloads_total` * `prometheus_target_scrape_pool_reloads_failed_total` * [BUGFIX] Fix panic when aggregator param is not a literal. #5290 ## 2.7.2 / 2019-03-02 * [BUGFIX] `prometheus_rule_group_last_evaluation_timestamp_seconds` is now a unix timestamp. #5186 ## 2.7.1 / 2019-01-31 This release has a fix for a Stored DOM XSS vulnerability that can be triggered when using the query history functionality. Thanks to Dor Tumarkin from Checkmarx for reporting it. * [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5163 * [BUGFIX] `prometheus_rule_group_last_duration_seconds` now reports seconds instead of nanoseconds. #5153 * [BUGFIX] Make sure the targets are consistently sorted in the targets page. #5161 ## 2.7.0 / 2019-01-28 We're rolling back the Dockerfile changes introduced in 2.6.0. If you made changes to your docker deployment in 2.6.0, you will need to roll them back. This release also adds experimental support for disk size based retention. To accommodate that we are deprecating the flag `storage.tsdb.retention` in favour of `storage.tsdb.retention.time`. We print a warning if the flag is in use, but it will function without breaking until Prometheus 3.0. * [CHANGE] Rollback Dockerfile to version at 2.5.0. Rollback of the breaking change introduced in 2.6.0. #5122 * [FEATURE] Add subqueries to PromQL. #4831 * [FEATURE] [EXPERIMENTAL] Add support for disk size based retention. Note that we don't consider the WAL size which could be significant and the time based retention policy also applies. #5109 prometheus/tsdb#343 * [FEATURE] Add CORS origin flag. #5011 * [ENHANCEMENT] Consul SD: Add tagged address to the discovery metadata. #5001 * [ENHANCEMENT] Kubernetes SD: Add service external IP and external name to the discovery metadata. #4940 * [ENHANCEMENT] Azure SD: Add support for Managed Identity authentication. #4590 * [ENHANCEMENT] Azure SD: Add tenant and subscription IDs to the discovery metadata. #4969 * [ENHANCEMENT] OpenStack SD: Add support for application credentials based authentication. #4968 * [ENHANCEMENT] Add metric for number of rule groups loaded. #5090 * [BUGFIX] Avoid duplicate tests for alert unit tests. #4964 * [BUGFIX] Don't depend on given order when comparing samples in alert unit testing. #5049 * [BUGFIX] Make sure the retention period doesn't overflow. #5112 * [BUGFIX] Make sure the blocks don't get very large. #5112 * [BUGFIX] Don't generate blocks with no samples. prometheus/tsdb#374 * [BUGFIX] Reintroduce metric for WAL corruptions. prometheus/tsdb#473 ## 2.6.1 / 2019-01-15 * [BUGFIX] Azure SD: Fix discovery getting stuck sometimes. #5088 * [BUGFIX] Marathon SD: Use `Tasks.Ports` when `RequirePorts` is `false`. #5026 * [BUGFIX] Promtool: Fix "out-of-order sample" errors when testing rules. #5069 ## 2.6.0 / 2018-12-17 * [CHANGE] Remove default flags from the container's entrypoint, run Prometheus from `/etc/prometheus` and symlink the storage directory to `/etc/prometheus/data`. #4976 * [CHANGE] Promtool: Remove the `update` command. #3839 * [FEATURE] Add JSON log format via the `--log.format` flag. #4876 * [FEATURE] API: Add /api/v1/labels endpoint to get all label names. #4835 * [FEATURE] Web: Allow setting the page's title via the `--web.ui-title` flag. #4841 * [ENHANCEMENT] Add `prometheus_tsdb_lowest_timestamp_seconds`, `prometheus_tsdb_head_min_time_seconds` and `prometheus_tsdb_head_max_time_seconds` metrics. #4888 * [ENHANCEMENT] Add `rule_group_last_evaluation_timestamp_seconds` metric. #4852 * [ENHANCEMENT] Add `prometheus_template_text_expansion_failures_total` and `prometheus_template_text_expansions_total` metrics. #4747 * [ENHANCEMENT] Set consistent User-Agent header in outgoing requests. #4891 * [ENHANCEMENT] Azure SD: Error out at load time when authentication parameters are missing. #4907 * [ENHANCEMENT] EC2 SD: Add the machine's private DNS name to the discovery metadata. #4693 * [ENHANCEMENT] EC2 SD: Add the operating system's platform to the discovery metadata. #4663 * [ENHANCEMENT] Kubernetes SD: Add the pod's phase to the discovery metadata. #4824 * [ENHANCEMENT] Kubernetes SD: Log Kubernetes messages. #4931 * [ENHANCEMENT] Promtool: Collect CPU and trace profiles. #4897 * [ENHANCEMENT] Promtool: Support writing output as JSON. #4848 * [ENHANCEMENT] Remote Read: Return available data if remote read fails partially. #4832 * [ENHANCEMENT] Remote Write: Improve queue performance. #4772 * [ENHANCEMENT] Remote Write: Add min_shards parameter to set the minimum number of shards. #4924 * [ENHANCEMENT] TSDB: Improve WAL reading. #4953 * [ENHANCEMENT] TSDB: Memory improvements. #4953 * [ENHANCEMENT] Web: Log stack traces on panic. #4221 * [ENHANCEMENT] Web UI: Add copy to clipboard button for configuration. #4410 * [ENHANCEMENT] Web UI: Support console queries at specific times. #4764 * [ENHANCEMENT] Web UI: group targets by job then instance. #4898 #4806 * [BUGFIX] Deduplicate handler labels for HTTP metrics. #4732 * [BUGFIX] Fix leaked queriers causing shutdowns to hang. #4922 * [BUGFIX] Fix configuration loading panics on nil pointer slice elements. #4942 * [BUGFIX] API: Correctly skip mismatching targets on /api/v1/targets/metadata. #4905 * [BUGFIX] API: Better rounding for incoming query timestamps. #4941 * [BUGFIX] Azure SD: Fix panic. #4867 * [BUGFIX] Console templates: Fix hover when the metric has a null value. #4906 * [BUGFIX] Discovery: Remove all targets when the scrape configuration gets empty. #4819 * [BUGFIX] Marathon SD: Fix leaked connections. #4915 * [BUGFIX] Marathon SD: Use 'hostPort' member of portMapping to construct target endpoints. #4887 * [BUGFIX] PromQL: Fix a goroutine leak in the lexer/parser. #4858 * [BUGFIX] Scrape: Pass through content-type for non-compressed output. #4912 * [BUGFIX] Scrape: Fix deadlock in the scrape's manager. #4894 * [BUGFIX] Scrape: Scrape targets at fixed intervals even after Prometheus restarts. #4926 * [BUGFIX] TSDB: Support restored snapshots including the head properly. #4953 * [BUGFIX] TSDB: Repair WAL when the last record in a segment is torn. #4953 * [BUGFIX] TSDB: Fix unclosed file readers on Windows systems. #4997 * [BUGFIX] Web: Avoid proxy to connect to the local gRPC server. #4572 ## 2.5.0 / 2018-11-06 * [CHANGE] Group targets by scrape config instead of job name. #4806 #4526 * [CHANGE] Marathon SD: Various changes to adapt to Marathon 1.5+. #4499 * [CHANGE] Discovery: Split `prometheus_sd_discovered_targets` metric by scrape and notify (Alertmanager SD) as well as by section in the respective configuration. #4753 * [FEATURE] Add OpenMetrics support for scraping (EXPERIMENTAL). #4700 * [FEATURE] Add unit testing for rules. #4350 * [FEATURE] Make maximum number of samples per query configurable via `--query.max-samples` flag. #4513 * [FEATURE] Make maximum number of concurrent remote reads configurable via `--storage.remote.read-concurrent-limit` flag. #4656 * [ENHANCEMENT] Support s390x platform for Linux. #4605 * [ENHANCEMENT] API: Add `prometheus_api_remote_read_queries` metric tracking currently executed or waiting remote read API requests. #4699 * [ENHANCEMENT] Remote Read: Add `prometheus_remote_storage_remote_read_queries` metric tracking currently in-flight remote read queries. #4677 * [ENHANCEMENT] Remote Read: Reduced memory usage. #4655 * [ENHANCEMENT] Discovery: Add `prometheus_sd_discovered_targets`, `prometheus_sd_received_updates_total`, `prometheus_sd_updates_delayed_total`, and `prometheus_sd_updates_total` metrics for discovery subsystem. #4667 * [ENHANCEMENT] Discovery: Improve performance of previously slow updates of changes of targets. #4526 * [ENHANCEMENT] Kubernetes SD: Add extended metrics. #4458 * [ENHANCEMENT] OpenStack SD: Support discovering instances from all projects. #4682 * [ENHANCEMENT] OpenStack SD: Discover all interfaces. #4649 * [ENHANCEMENT] OpenStack SD: Support `tls_config` for the used HTTP client. #4654 * [ENHANCEMENT] Triton SD: Add ability to filter triton_sd targets by pre-defined groups. #4701 * [ENHANCEMENT] Web UI: Avoid browser spell-checking in expression field. #4728 * [ENHANCEMENT] Web UI: Add scrape duration and last evaluation time in targets and rules pages. #4722 * [ENHANCEMENT] Web UI: Improve rule view by wrapping lines. #4702 * [ENHANCEMENT] Rules: Error out at load time for invalid templates, rather than at evaluation time. #4537 * [ENHANCEMENT] TSDB: Add metrics for WAL operations. #4692 * [BUGFIX] Change max/min over_time to handle NaNs properly. #4386 * [BUGFIX] Check label name for `count_values` PromQL function. #4585 * [BUGFIX] Ensure that vectors and matrices do not contain identical label-sets. #4589 ## 2.4.3 / 2018-10-04 * [BUGFIX] Fix panic when using custom EC2 API for SD #4672 * [BUGFIX] Fix panic when Zookeeper SD cannot connect to servers #4669 * [BUGFIX] Make the skip_head an optional parameter for snapshot API #4674 ## 2.4.2 / 2018-09-21 The last release didn't have bugfix included due to a vendoring error. * [BUGFIX] Handle WAL corruptions properly prometheus/tsdb#389 * [BUGFIX] Handle WAL migrations correctly on Windows prometheus/tsdb#392 ## 2.4.1 / 2018-09-19 * [ENHANCEMENT] New TSDB metrics prometheus/tsdb#375 prometheus/tsdb#363 * [BUGFIX] Render UI correctly for Windows #4616 ## 2.4.0 / 2018-09-11 This release includes multiple bugfixes and features. Further, the WAL implementation has been re-written so the storage is not forward compatible. Prometheus 2.3 storage will work on 2.4 but not vice-versa. * [CHANGE] Reduce remote write default retries #4279 * [CHANGE] Remove /heap endpoint #4460 * [FEATURE] Persist alert 'for' state across restarts #4061 * [FEATURE] Add API providing per target metric metadata #4183 * [FEATURE] Add API providing recording and alerting rules #4318 #4501 * [ENHANCEMENT] Brand new WAL implementation for TSDB. Forwards incompatible with previous WAL. * [ENHANCEMENT] Show rule evaluation errors in UI #4457 * [ENHANCEMENT] Throttle resends of alerts to Alertmanager #4538 * [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550 * [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532 * [ENHANCEMENT] Limit the data read in through remote read #4239 * [ENHANCEMENT] Coalesce identical SD configurations #3912 * [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454 * [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208 * [ENHANCEMENT] Optimize PromQL aggregations #4248 * [ENHANCEMENT] Remote read: Add Offset to hints #4226 * [ENHANCEMENT] `consul_sd`: Add support for ServiceMeta field #4280 * [ENHANCEMENT] `ec2_sd`: Maintain order of subnet_id label #4405 * [ENHANCEMENT] `ec2_sd`: Add support for custom endpoint to support EC2 compliant APIs #4333 * [ENHANCEMENT] `ec2_sd`: Add instance_owner label #4514 * [ENHANCEMENT] `azure_sd`: Add support for VMSS discovery and multiple environments #4202 #4569 * [ENHANCEMENT] `gce_sd`: Add instance_id label #4488 * [ENHANCEMENT] Forbid rule-abiding robots from indexing #4266 * [ENHANCEMENT] Log virtual memory limits on startup #4418 * [BUGFIX] Wait for service discovery to stop before exiting #4508 * [BUGFIX] Render SD configs properly #4338 * [BUGFIX] Only add LookbackDelta to vector selectors #4399 * [BUGFIX] `ec2_sd`: Handle panic-ing nil pointer #4469 * [BUGFIX] `consul_sd`: Stop leaking connections #4443 * [BUGFIX] Use templated labels also to identify alerts #4500 * [BUGFIX] Reduce floating point errors in stddev and related functions #4533 * [BUGFIX] Log errors while encoding responses #4359 ## 2.3.2 / 2018-07-12 * [BUGFIX] Fix various tsdb bugs #4369 * [BUGFIX] Reorder startup and shutdown to prevent panics. #4321 * [BUGFIX] Exit with non-zero code on error #4296 * [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329 * [BUGFIX] Fix race in zookeeper sd #4355 * [BUGFIX] Better timeout handling in promql #4291 #4300 * [BUGFIX] Propagate errors when selecting series from the tsdb #4136 ## 2.3.1 / 2018-06-19 * [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275 * [BUGFIX] Fix nil pointer deference when using various API endpoints #4282 * [BUGFIX] config: set target group source index during unmarshaling #4245 * [BUGFIX] discovery/file: fix logging #4178 * [BUGFIX] kubernetes_sd: fix namespace filtering #4285 * [BUGFIX] web: restore old path prefix behavior #4273 * [BUGFIX] web: remove security headers added in 2.3.0 #4259 ## 2.3.0 / 2018-06-05 * [CHANGE] `marathon_sd`: use `auth_token` and `auth_token_file` for token-based authentication instead of `bearer_token` and `bearer_token_file` respectively. * [CHANGE] Metric names for HTTP server metrics changed * [FEATURE] Add query commands to promtool * [FEATURE] Add security headers to HTTP server responses * [FEATURE] Pass query hints via remote read API * [FEATURE] Basic auth passwords can now be configured via file across all configuration * [ENHANCEMENT] Optimize PromQL and API serialization for memory usage and allocations * [ENHANCEMENT] Limit number of dropped targets in web UI * [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement * [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery * [ENHANCEMENT] `marathon_sd`: adds support for basic and bearer authentication, plus all other common HTTP client options (TLS config, proxy URL, etc.) * [ENHANCEMENT] Provide machine type metadata and labels in GCE service discovery * [ENHANCEMENT] Add pod controller kind and name to Kubernetes service discovery data * [ENHANCEMENT] Move TSDB to flock-based log file that works with Docker containers * [BUGFIX] Properly propagate storage errors in PromQL * [BUGFIX] Fix path prefix for web pages * [BUGFIX] Fix goroutine leak in Consul service discovery * [BUGFIX] Fix races in scrape manager * [BUGFIX] Fix OOM for very large k in PromQL topk() queries * [BUGFIX] Make remote write more resilient to unavailable receivers * [BUGFIX] Make remote write shutdown cleanly * [BUGFIX] Don't leak files on errors in TSDB's tombstone cleanup * [BUGFIX] Unary minus expressions now removes the metric name from results * [BUGFIX] Fix bug that lead to wrong amount of samples considered for time range expressions ## 2.2.1 / 2018-03-13 * [BUGFIX] Fix data loss in TSDB on compaction * [BUGFIX] Correctly stop timer in remote-write path * [BUGFIX] Fix deadlock triggered by loading targets page * [BUGFIX] Fix incorrect buffering of samples on range selection queries * [BUGFIX] Handle large index files on windows properly ## 2.2.0 / 2018-03-08 * [CHANGE] Rename file SD mtime metric. * [CHANGE] Send target update on empty pod IP in Kubernetes SD. * [FEATURE] Add API endpoint for flags. * [FEATURE] Add API endpoint for dropped targets. * [FEATURE] Display annotations on alerts page. * [FEATURE] Add option to skip head data when taking snapshots. * [ENHANCEMENT] Federation performance improvement. * [ENHANCEMENT] Read bearer token file on every scrape. * [ENHANCEMENT] Improve typeahead on `/graph` page. * [ENHANCEMENT] Change rule file formatting. * [ENHANCEMENT] Set consul server default to `localhost:8500`. * [ENHANCEMENT] Add dropped Alertmanagers to API info endpoint. * [ENHANCEMENT] Add OS type meta label to Azure SD. * [ENHANCEMENT] Validate required fields in SD configuration. * [BUGFIX] Prevent stack overflow on deep recursion in TSDB. * [BUGFIX] Correctly read offsets in index files that are greater than 4GB. * [BUGFIX] Fix scraping behavior for empty labels. * [BUGFIX] Drop metric name for bool modifier. * [BUGFIX] Fix races in discovery. * [BUGFIX] Fix Kubernetes endpoints SD for empty subsets. * [BUGFIX] Throttle updates from SD providers, which caused increased CPU usage and allocations. * [BUGFIX] Fix TSDB block reload issue. * [BUGFIX] Fix PromQL printing of empty `without()`. * [BUGFIX] Don't reset FiredAt for inactive alerts. * [BUGFIX] Fix erroneous file version changes and repair existing data. ## 2.1.0 / 2018-01-19 * [FEATURE] New Service Discovery UI showing labels before and after relabelling. * [FEATURE] New Admin APIs added to v1 to delete, snapshot and remove tombstones. * [ENHANCEMENT] The graph UI autcomplete now includes your previous queries. * [ENHANCEMENT] Federation is now much faster for large numbers of series. * [ENHANCEMENT] Added new metrics to measure rule timings. * [ENHANCEMENT] Rule evaluation times added to the rules UI. * [ENHANCEMENT] Added metrics to measure modified time of file SD files. * [ENHANCEMENT] Kubernetes SD now includes POD UID in discovery metadata. * [ENHANCEMENT] The Query APIs now return optional stats on query execution times. * [ENHANCEMENT] The index now no longer has the 4GiB size limit and is also smaller. * [BUGFIX] Remote read `read_recent` option is now false by default. * [BUGFIX] Pass the right configuration to each Alertmanager (AM) when using multiple AM configs. * [BUGFIX] Fix not-matchers not selecting series with labels unset. * [BUGFIX] tsdb: Fix occasional panic in head block. * [BUGFIX] tsdb: Close files before deletion to fix retention issues on Windows and NFS. * [BUGFIX] tsdb: Cleanup and do not retry failing compactions. * [BUGFIX] tsdb: Close WAL while shutting down. ## 2.0.0 / 2017-11-08 This release includes a completely rewritten storage, huge performance improvements, but also many backwards incompatible changes. For more information, read the announcement blog post and migration guide. https://prometheus.io/blog/2017/11/08/announcing-prometheus-2-0/ https://prometheus.io/docs/prometheus/2.0/migration/ * [CHANGE] Completely rewritten storage layer, with WAL. This is not backwards compatible with 1.x storage, and many flags have changed/disappeared. * [CHANGE] New staleness behavior. Series now marked stale after target scrapes no longer return them, and soon after targets disappear from service discovery. * [CHANGE] Rules files use YAML syntax now. Conversion tool added to promtool. * [CHANGE] Removed `count_scalar`, `drop_common_labels` functions and `keep_common` modifier from PromQL. * [CHANGE] Rewritten exposition format parser with much higher performance. The Protobuf exposition format is no longer supported. * [CHANGE] Example console templates updated for new storage and metrics names. Examples other than node exporter and Prometheus removed. * [CHANGE] Admin and lifecycle APIs now disabled by default, can be re-enabled via flags * [CHANGE] Flags switched to using Kingpin, all flags are now --flagname rather than -flagname. * [FEATURE/CHANGE] Remote read can be configured to not read data which is available locally. This is enabled by default. * [FEATURE] Rules can be grouped now. Rules within a rule group are executed sequentially. * [FEATURE] Added experimental GRPC apis * [FEATURE] Add timestamp() function to PromQL. * [ENHANCEMENT] Remove remote read from the query path if no remote storage is configured. * [ENHANCEMENT] Bump Consul HTTP client timeout to not match the Consul SD watch timeout. * [ENHANCEMENT] Go-conntrack added to provide HTTP connection metrics. * [BUGFIX] Fix connection leak in Consul SD. ## 1.8.2 / 2017-11-04 * [BUGFIX] EC2 service discovery: Do not crash if tags are empty. ## 1.8.1 / 2017-10-19 * [BUGFIX] Correctly handle external labels on remote read endpoint ## 1.8.0 / 2017-10-06 * [CHANGE] Rule links link to the _Console_ tab rather than the _Graph_ tab to not trigger expensive range queries by default. * [FEATURE] Ability to act as a remote read endpoint for other Prometheus servers. * [FEATURE] K8s SD: Support discovery of ingresses. * [FEATURE] Consul SD: Support for node metadata. * [FEATURE] Openstack SD: Support discovery of hypervisors. * [FEATURE] Expose current Prometheus config via `/status/config`. * [FEATURE] Allow to collapse jobs on `/targets` page. * [FEATURE] Add `/-/healthy` and `/-/ready` endpoints. * [FEATURE] Add color scheme support to console templates. * [ENHANCEMENT] Remote storage connections use HTTP keep-alive. * [ENHANCEMENT] Improved logging about remote storage. * [ENHANCEMENT] Relaxed URL validation. * [ENHANCEMENT] Openstack SD: Handle instances without IP. * [ENHANCEMENT] Make remote storage queue manager configurable. * [ENHANCEMENT] Validate metrics returned from remote read. * [ENHANCEMENT] EC2 SD: Set a default region. * [ENHANCEMENT] Changed help link to `https://prometheus.io/docs`. * [BUGFIX] Fix floating-point precision issue in `deriv` function. * [BUGFIX] Fix pprof endpoints when -web.route-prefix or -web.external-url is used. * [BUGFIX] Fix handling of `null` target groups in file-based SD. * [BUGFIX] Set the sample timestamp in date-related PromQL functions. * [BUGFIX] Apply path prefix to redirect from deprecated graph URL. * [BUGFIX] Fixed tests on MS Windows. * [BUGFIX] Check for invalid UTF-8 in label values after relabeling. ## 1.7.2 / 2017-09-26 * [BUGFIX] Correctly remove all targets from DNS service discovery if the corresponding DNS query succeeds and returns an empty result. * [BUGFIX] Correctly parse resolution input in expression browser. * [BUGFIX] Consistently use UTC in the date picker of the expression browser. * [BUGFIX] Correctly handle multiple ports in Marathon service discovery. * [BUGFIX] Fix HTML escaping so that HTML templates compile with Go1.9. * [BUGFIX] Prevent number of remote write shards from going negative. * [BUGFIX] In the graphs created by the expression browser, render very large and small numbers in a readable way. * [BUGFIX] Fix a rarely occurring iterator issue in varbit encoded chunks. ## 1.7.1 / 2017-06-12 * [BUGFIX] Fix double prefix redirect. ## 1.7.0 / 2017-06-06 * [CHANGE] Compress remote storage requests and responses with unframed/raw snappy. * [CHANGE] Properly ellide secrets in config. * [FEATURE] Add OpenStack service discovery. * [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces. * [FEATURE] Add metric for discovered number of Alertmanagers. * [ENHANCEMENT] Print system information (uname) on start up. * [ENHANCEMENT] Show gaps in graphs on expression browser. * [ENHANCEMENT] Promtool linter checks counter naming and more reserved labels. * [BUGFIX] Fix broken Mesos discovery. * [BUGFIX] Fix redirect when external URL is set. * [BUGFIX] Fix mutation of active alert elements by notifier. * [BUGFIX] Fix HTTP error handling for remote write. * [BUGFIX] Fix builds for Solaris/Illumos. * [BUGFIX] Fix overflow checking in global config. * [BUGFIX] Fix log level reporting issue. * [BUGFIX] Fix ZooKeeper serverset discovery can become out-of-sync. ## 1.6.3 / 2017-05-18 * [BUGFIX] Fix disappearing Alertmanger targets in Alertmanager discovery. * [BUGFIX] Fix panic with remote_write on ARMv7. * [BUGFIX] Fix stacked graphs to adapt min/max values. ## 1.6.2 / 2017-05-11 * [BUGFIX] Fix potential memory leak in Kubernetes service discovery ## 1.6.1 / 2017-04-19 * [BUGFIX] Don't panic if storage has no FPs even after initial wait ## 1.6.0 / 2017-04-14 * [CHANGE] Replaced the remote write implementations for various backends by a generic write interface with example adapter implementation for various backends. Note that both the previous and the current remote write implementations are **experimental**. * [FEATURE] New flag `-storage.local.target-heap-size` to tell Prometheus about the desired heap size. This deprecates the flags `-storage.local.memory-chunks` and `-storage.local.max-chunks-to-persist`, which are kept for backward compatibility. * [FEATURE] Add `check-metrics` to `promtool` to lint metric names. * [FEATURE] Add Joyent Triton discovery. * [FEATURE] `X-Prometheus-Scrape-Timeout-Seconds` header in HTTP scrape requests. * [FEATURE] Remote read interface, including example for InfluxDB. **Experimental.** * [FEATURE] Enable Consul SD to connect via TLS. * [FEATURE] Marathon SD supports multiple ports. * [FEATURE] Marathon SD supports bearer token for authentication. * [FEATURE] Custom timeout for queries. * [FEATURE] Expose `buildQueryUrl` in `graph.js`. * [FEATURE] Add `rickshawGraph` property to the graph object in console templates. * [FEATURE] New metrics exported by Prometheus itself: * Summary `prometheus_engine_query_duration_seconds` * Counter `prometheus_evaluator_iterations_missed_total` * Counter `prometheus_evaluator_iterations_total` * Gauge `prometheus_local_storage_open_head_chunks` * Gauge `prometheus_local_storage_target_heap_size` * [ENHANCEMENT] Reduce shut-down time by interrupting an ongoing checkpoint before starting the final checkpoint. * [ENHANCEMENT] Auto-tweak times between checkpoints to limit time spent in checkpointing to 50%. * [ENHANCEMENT] Improved crash recovery deals better with certain index corruptions. * [ENHANCEMENT] Graphing deals better with constant time series. * [ENHANCEMENT] Retry remote writes on recoverable errors. * [ENHANCEMENT] Evict unused chunk descriptors during crash recovery to limit memory usage. * [ENHANCEMENT] Smoother disk usage during series maintenance. * [ENHANCEMENT] Targets on targets page sorted by instance within a job. * [ENHANCEMENT] Sort labels in federation. * [ENHANCEMENT] Set `GOGC=40` by default, which results in much better memory utilization at the price of slightly higher CPU usage. If `GOGC` is set by the user, it is still honored as usual. * [ENHANCEMENT] Close head chunks after being idle for the duration of the configured staleness delta. This helps to persist and evict head chunk of stale series more quickly. * [ENHANCEMENT] Stricter checking of relabel config. * [ENHANCEMENT] Cache busters for static web content. * [ENHANCEMENT] Send Prometheus-specific user-agent header during scrapes. * [ENHANCEMENT] Improved performance of series retention cut-off. * [ENHANCEMENT] Mitigate impact of non-atomic sample ingestion on `histogram_quantile` by enforcing buckets to be monotonic. * [ENHANCEMENT] Released binaries built with Go 1.8.1. * [BUGFIX] Send `instance=""` with federation if `instance` not set. * [BUGFIX] Update to new `client_golang` to get rid of unwanted quantile metrics in summaries. * [BUGFIX] Introduce several additional guards against data corruption. * [BUGFIX] Mark storage dirty and increment `prometheus_local_storage_persist_errors_total` on all relevant errors. * [BUGFIX] Propagate storage errors as 500 in the HTTP API. * [BUGFIX] Fix int64 overflow in timestamps in the HTTP API. * [BUGFIX] Fix deadlock in Zookeeper SD. * [BUGFIX] Fix fuzzy search problems in the web-UI auto-completion. ## 1.5.3 / 2017-05-11 * [BUGFIX] Fix potential memory leak in Kubernetes service discovery ## 1.5.2 / 2017-02-10 * [BUGFIX] Fix series corruption in a special case of series maintenance where the minimum series-file-shrink-ratio kicks in. * [BUGFIX] Fix two panic conditions both related to processing a series scheduled to be quarantined. * [ENHANCEMENT] Binaries built with Go1.7.5. ## 1.5.1 / 2017-02-07 * [BUGFIX] Don't lose fully persisted memory series during checkpointing. * [BUGFIX] Fix intermittently failing relabeling. * [BUGFIX] Make `-storage.local.series-file-shrink-ratio` work. * [BUGFIX] Remove race condition from TestLoop. ## 1.5.0 / 2017-01-23 * [CHANGE] Use lexicographic order to sort alerts by name. * [FEATURE] Add Joyent Triton discovery. * [FEATURE] Add scrape targets and alertmanager targets API. * [FEATURE] Add various persistence related metrics. * [FEATURE] Add various query engine related metrics. * [FEATURE] Add ability to limit scrape samples, and related metrics. * [FEATURE] Add labeldrop and labelkeep relabelling actions. * [FEATURE] Display current working directory on status-page. * [ENHANCEMENT] Strictly use ServiceAccount for in cluster configuration on Kubernetes. * [ENHANCEMENT] Various performance and memory-management improvements. * [BUGFIX] Fix basic auth for alertmanagers configured via flag. * [BUGFIX] Don't panic on decoding corrupt data. * [BUGFIX] Ignore dotfiles in data directory. * [BUGFIX] Abort on intermediate federation errors. ## 1.4.1 / 2016-11-28 * [BUGFIX] Fix Consul service discovery ## 1.4.0 / 2016-11-25 * [FEATURE] Allow configuring Alertmanagers via service discovery * [FEATURE] Display used Alertmanagers on runtime page in the UI * [FEATURE] Support profiles in AWS EC2 service discovery configuration * [ENHANCEMENT] Remove duplicated logging of Kubernetes client errors * [ENHANCEMENT] Add metrics about Kubernetes service discovery * [BUGFIX] Update alert annotations on re-evaluation * [BUGFIX] Fix export of group modifier in PromQL queries * [BUGFIX] Remove potential deadlocks in several service discovery implementations * [BUGFIX] Use proper float64 modulo in PromQL `%` binary operations * [BUGFIX] Fix crash bug in Kubernetes service discovery ## 1.3.1 / 2016-11-04 This bug-fix release pulls in the fixes from the 1.2.3 release. * [BUGFIX] Correctly handle empty Regex entry in relabel config. * [BUGFIX] MOD (`%`) operator doesn't panic with small floating point numbers. * [BUGFIX] Updated miekg/dns vendoring to pick up upstream bug fixes. * [ENHANCEMENT] Improved DNS error reporting. ## 1.2.3 / 2016-11-04 Note that this release is chronologically after 1.3.0. * [BUGFIX] Correctly handle end time before start time in range queries. * [BUGFIX] Error on negative `-storage.staleness-delta` * [BUGFIX] Correctly handle empty Regex entry in relabel config. * [BUGFIX] MOD (`%`) operator doesn't panic with small floating point numbers. * [BUGFIX] Updated miekg/dns vendoring to pick up upstream bug fixes. * [ENHANCEMENT] Improved DNS error reporting. ## 1.3.0 / 2016-11-01 This is a breaking change to the Kubernetes service discovery. * [CHANGE] Rework Kubernetes SD. * [FEATURE] Add support for interpolating `target_label`. * [FEATURE] Add GCE metadata as Prometheus meta labels. * [ENHANCEMENT] Add EC2 SD metrics. * [ENHANCEMENT] Add Azure SD metrics. * [ENHANCEMENT] Add fuzzy search to `/graph` textarea. * [ENHANCEMENT] Always show instance labels on target page. * [BUGFIX] Validate query end time is not before start time. * [BUGFIX] Error on negative `-storage.staleness-delta` ## 1.2.2 / 2016-10-30 * [BUGFIX] Correctly handle on() in alerts. * [BUGFIX] UI: Deal properly with aborted requests. * [BUGFIX] UI: Decode URL query parameters properly. * [BUGFIX] Storage: Deal better with data corruption (non-monotonic timestamps). * [BUGFIX] Remote storage: Re-add accidentally removed timeout flag. * [BUGFIX] Updated a number of vendored packages to pick up upstream bug fixes. ## 1.2.1 / 2016-10-10 * [BUGFIX] Count chunk evictions properly so that the server doesn't assume it runs out of memory and subsequencly throttles ingestion. * [BUGFIX] Use Go1.7.1 for prebuilt binaries to fix issues on MacOS Sierra. ## 1.2.0 / 2016-10-07 * [FEATURE] Cleaner encoding of query parameters in `/graph` URLs. * [FEATURE] PromQL: Add `minute()` function. * [FEATURE] Add GCE service discovery. * [FEATURE] Allow any valid UTF-8 string as job name. * [FEATURE] Allow disabling local storage. * [FEATURE] EC2 service discovery: Expose `ec2_instance_state`. * [ENHANCEMENT] Various performance improvements in local storage. * [BUGFIX] Zookeeper service discovery: Remove deleted nodes. * [BUGFIX] Zookeeper service discovery: Resync state after Zookeeper failure. * [BUGFIX] Remove JSON from HTTP Accept header. * [BUGFIX] Fix flag validation of Alertmanager URL. * [BUGFIX] Fix race condition on shutdown. * [BUGFIX] Do not fail Consul discovery on Prometheus startup when Consul is down. * [BUGFIX] Handle NaN in `changes()` correctly. * [CHANGE] **Experimental** remote write path: Remove use of gRPC. * [CHANGE] **Experimental** remote write path: Configuration via config file rather than command line flags. * [FEATURE] **Experimental** remote write path: Add HTTP basic auth and TLS. * [FEATURE] **Experimental** remote write path: Support for relabelling. ## 1.1.3 / 2016-09-16 * [ENHANCEMENT] Use golang-builder base image for tests in CircleCI. * [ENHANCEMENT] Added unit tests for federation. * [BUGFIX] Correctly de-dup metric families in federation output. ## 1.1.2 / 2016-09-08 * [BUGFIX] Allow label names that coincide with keywords. ## 1.1.1 / 2016-09-07 * [BUGFIX] Fix IPv6 escaping in service discovery integrations * [BUGFIX] Fix default scrape port assignment for IPv6 ## 1.1.0 / 2016-09-03 * [FEATURE] Add `quantile` and `quantile_over_time`. * [FEATURE] Add `stddev_over_time` and `stdvar_over_time`. * [FEATURE] Add various time and date functions. * [FEATURE] Added `toUpper` and `toLower` formatting to templates. * [FEATURE] Allow relabeling of alerts. * [FEATURE] Allow URLs in targets defined via a JSON file. * [FEATURE] Add idelta function. * [FEATURE] 'Remove graph' button on the /graph page. * [FEATURE] Kubernetes SD: Add node name and host IP to pod discovery. * [FEATURE] New remote storage write path. EXPERIMENTAL! * [ENHANCEMENT] Improve time-series index lookups. * [ENHANCEMENT] Forbid invalid relabel configurations. * [ENHANCEMENT] Improved various tests. * [ENHANCEMENT] Add crash recovery metric 'started_dirty'. * [ENHANCEMENT] Fix (and simplify) populating series iterators. * [ENHANCEMENT] Add job link on target page. * [ENHANCEMENT] Message on empty Alerts page. * [ENHANCEMENT] Various internal code refactorings and clean-ups. * [ENHANCEMENT] Various improvements in the build system. * [BUGFIX] Catch errors when unmarshaling delta/doubleDelta encoded chunks. * [BUGFIX] Fix data race in lexer and lexer test. * [BUGFIX] Trim stray whitespace from bearer token file. * [BUGFIX] Avoid divide-by-zero panic on query_range?step=0. * [BUGFIX] Detect invalid rule files at startup. * [BUGFIX] Fix counter reset treatment in PromQL. * [BUGFIX] Fix rule HTML escaping issues. * [BUGFIX] Remove internal labels from alerts sent to AM. ## 1.0.2 / 2016-08-24 * [BUGFIX] Clean up old targets after config reload. ## 1.0.1 / 2016-07-21 * [BUGFIX] Exit with error on non-flag command-line arguments. * [BUGFIX] Update example console templates to new HTTP API. * [BUGFIX] Re-add logging flags. ## 1.0.0 / 2016-07-18 * [CHANGE] Remove deprecated query language keywords * [CHANGE] Change Kubernetes SD to require specifying Kubernetes role * [CHANGE] Use service address in Consul SD if available * [CHANGE] Standardize all Prometheus internal metrics to second units * [CHANGE] Remove unversioned legacy HTTP API * [CHANGE] Remove legacy ingestion of JSON metric format * [CHANGE] Remove deprecated `target_groups` configuration * [FEATURE] Add binary power operation to PromQL * [FEATURE] Add `count_values` aggregator * [FEATURE] Add `-web.route-prefix` flag * [FEATURE] Allow `on()`, `by()`, `without()` in PromQL with empty label sets * [ENHANCEMENT] Make `topk/bottomk` query functions aggregators * [BUGFIX] Fix annotations in alert rule printing * [BUGFIX] Expand alert templating at evaluation time * [BUGFIX] Fix edge case handling in crash recovery * [BUGFIX] Hide testing package flags from help output ## 0.20.0 / 2016-06-15 This release contains multiple breaking changes to the configuration schema. * [FEATURE] Allow configuring multiple Alertmanagers * [FEATURE] Add server name to TLS configuration * [FEATURE] Add labels for all node addresses and discover node port if available in Kubernetes SD * [ENHANCEMENT] More meaningful configuration errors * [ENHANCEMENT] Round scraping timestamps to milliseconds in web UI * [ENHANCEMENT] Make number of storage fingerprint locks configurable * [BUGFIX] Fix date parsing in console template graphs * [BUGFIX] Fix static console files in Docker images * [BUGFIX] Fix console JS XHR requests for IE11 * [BUGFIX] Add missing path prefix in new status page * [CHANGE] Rename `target_groups` to `static_configs` in config files * [CHANGE] Rename `names` to `files` in file SD configuration * [CHANGE] Remove kubelet port config option in Kubernetes SD configuration ## 0.19.3 / 2016-06-14 * [BUGFIX] Handle Marathon apps with zero ports * [BUGFIX] Fix startup panic in retrieval layer ## 0.19.2 / 2016-05-29 * [BUGFIX] Correctly handle `GROUP_LEFT` and `GROUP_RIGHT` without labels in string representation of expressions and in rules. * [BUGFIX] Use `-web.external-url` for new status endpoints. ## 0.19.1 / 2016-05-25 * [BUGFIX] Handle service discovery panic affecting Kubernetes SD * [BUGFIX] Fix web UI display issue in some browsers ## 0.19.0 / 2016-05-24 This version contains a breaking change to the query language. Please read the documentation on the grouping behavior of vector matching: https://prometheus.io/docs/querying/operators/#vector-matching * [FEATURE] Add experimental Microsoft Azure service discovery * [FEATURE] Add `ignoring` modifier for binary operations * [FEATURE] Add pod discovery to Kubernetes service discovery * [CHANGE] Vector matching takes grouping labels from one-side * [ENHANCEMENT] Support time range on /api/v1/series endpoint * [ENHANCEMENT] Partition status page into individual pages * [BUGFIX] Fix issue of hanging target scrapes ## 0.18.0 / 2016-04-18 * [BUGFIX] Fix operator precedence in PromQL * [BUGFIX] Never drop still open head chunk * [BUGFIX] Fix missing 'keep_common' when printing AST node * [CHANGE/BUGFIX] Target identity considers path and parameters additionally to host and port * [CHANGE] Rename metric `prometheus_local_storage_invalid_preload_requests_total` to `prometheus_local_storage_non_existent_series_matches_total` * [CHANGE] Support for old alerting rule syntax dropped * [FEATURE] Deduplicate targets within the same scrape job * [FEATURE] Add varbit chunk encoding (higher compression, more CPU usage – disabled by default) * [FEATURE] Add `holt_winters` query function * [FEATURE] Add relative complement `unless` operator to PromQL * [ENHANCEMENT] Quarantine series file if data corruption is encountered (instead of crashing) * [ENHANCEMENT] Validate Alertmanager URL * [ENHANCEMENT] Use UTC for build timestamp * [ENHANCEMENT] Improve index query performance (especially for active time series) * [ENHANCEMENT] Instrument configuration reload duration * [ENHANCEMENT] Instrument retrieval layer * [ENHANCEMENT] Add Go version to `prometheus_build_info` metric ## 0.17.0 / 2016-03-02 This version no longer works with Alertmanager 0.0.4 and earlier! The alerting rule syntax has changed as well but the old syntax is supported up until version 0.18. All regular expressions in PromQL are anchored now, matching the behavior of regular expressions in config files. * [CHANGE] Integrate with Alertmanager 0.1.0 and higher * [CHANGE] Degraded storage mode renamed to rushed mode * [CHANGE] New alerting rule syntax * [CHANGE] Add label validation on ingestion * [CHANGE] Regular expression matchers in PromQL are anchored * [FEATURE] Add `without` aggregation modifier * [FEATURE] Send alert resolved notifications to Alertmanager * [FEATURE] Allow millisecond precision in configuration file * [FEATURE] Support AirBnB's Smartstack Nerve for service discovery * [ENHANCEMENT] Storage switches less often between regular and rushed mode. * [ENHANCEMENT] Storage switches into rushed mode if there are too many memory chunks. * [ENHANCEMENT] Added more storage instrumentation * [ENHANCEMENT] Improved instrumentation of notification handler * [BUGFIX] Do not count head chunks as chunks waiting for persistence * [BUGFIX] Handle OPTIONS HTTP requests to the API correctly * [BUGFIX] Parsing of ranges in PromQL fixed * [BUGFIX] Correctly validate URL flag parameters * [BUGFIX] Log argument parse errors * [BUGFIX] Properly handle creation of target with bad TLS config * [BUGFIX] Fix of checkpoint timing issue ## 0.16.2 / 2016-01-18 * [FEATURE] Multiple authentication options for EC2 discovery added * [FEATURE] Several meta labels for EC2 discovery added * [FEATURE] Allow full URLs in static target groups (used e.g. by the `blackbox_exporter`) * [FEATURE] Add Graphite remote-storage integration * [FEATURE] Create separate Kubernetes targets for services and their endpoints * [FEATURE] Add `clamp_{min,max}` functions to PromQL * [FEATURE] Omitted time parameter in API query defaults to now * [ENHANCEMENT] Less frequent time series file truncation * [ENHANCEMENT] Instrument number of manually deleted time series * [ENHANCEMENT] Ignore lost+found directory during storage version detection * [CHANGE] Kubernetes `masters` renamed to `api_servers` * [CHANGE] "Healthy" and "unhealthy" targets are now called "up" and "down" in the web UI * [CHANGE] Remove undocumented 2nd argument of the `delta` function. (This is a BREAKING CHANGE for users of the undocumented 2nd argument.) * [BUGFIX] Return proper HTTP status codes on API errors * [BUGFIX] Fix Kubernetes authentication configuration * [BUGFIX] Fix stripped OFFSET from in rule evaluation and display * [BUGFIX] Do not crash on failing Consul SD initialization * [BUGFIX] Revert changes to metric auto-completion * [BUGFIX] Add config overflow validation for TLS configuration * [BUGFIX] Skip already watched Zookeeper nodes in serverset SD * [BUGFIX] Don't federate stale samples * [BUGFIX] Move NaN to end of result for `topk/bottomk/sort/sort_desc/min/max` * [BUGFIX] Limit extrapolation of `delta/rate/increase` * [BUGFIX] Fix unhandled error in rule evaluation Some changes to the Kubernetes service discovery were integration since it was released as a beta feature. ## 0.16.1 / 2015-10-16 * [FEATURE] Add `irate()` function. * [ENHANCEMENT] Improved auto-completion in expression browser. * [CHANGE] Kubernetes SD moves node label to instance label. * [BUGFIX] Escape regexes in console templates. ## 0.16.0 / 2015-10-09 BREAKING CHANGES: * Release tarballs now contain the built binaries in a nested directory. * The `hash_mod` relabeling action now uses MD5 hashes instead of FNV hashes to achieve a better distribution. * The DNS-SD meta label `__meta_dns_srv_name` was renamed to `__meta_dns_name` to reflect support for DNS record types other than `SRV`. * The default full refresh interval for the file-based service discovery has been increased from 30 seconds to 5 minutes. * In relabeling, parts of a source label that weren't matched by the specified regular expression are no longer included in the replacement output. * Queries no longer interpolate between two data points. Instead, the resulting value will always be the latest value before the evaluation query timestamp. * Regular expressions supplied via the configuration are now anchored to match full strings instead of substrings. * Global labels are not appended upon storing time series anymore. Instead, they are only appended when communicating with external systems (Alertmanager, remote storages, federation). They have thus also been renamed from `global.labels` to `global.external_labels`. * The names and units of metrics related to remote storage sample appends have been changed. * The experimental support for writing to InfluxDB has been updated to work with InfluxDB 0.9.x. 0.8.x versions of InfluxDB are not supported anymore. * Escape sequences in double- and single-quoted string literals in rules or query expressions are now interpreted like escape sequences in Go string literals (https://golang.org/ref/spec#String_literals). Future breaking changes / deprecated features: * The `delta()` function had an undocumented optional second boolean argument to make it behave like `increase()`. This second argument will be removed in the future. Migrate any occurrences of `delta(x, 1)` to use `increase(x)` instead. * Support for filter operators between two scalar values (like `2 > 1`) will be removed in the future. These will require a `bool` modifier on the operator, e.g. `2 > bool 1`. All changes: * [CHANGE] Renamed `global.labels` to `global.external_labels`. * [CHANGE] Vendoring is now done via govendor instead of godep. * [CHANGE] Change web UI root page to show the graphing interface instead of the server status page. * [CHANGE] Append global labels only when communicating with external systems instead of storing them locally. * [CHANGE] Change all regexes in the configuration to do full-string matches instead of substring matches. * [CHANGE] Remove interpolation of vector values in queries. * [CHANGE] For alert `SUMMARY`/`DESCRIPTION` template fields, cast the alert value to `float64` to work with common templating functions. * [CHANGE] In relabeling, don't include unmatched source label parts in the replacement. * [CHANGE] Change default full refresh interval for the file-based service discovery from 30 seconds to 5 minutes. * [CHANGE] Rename the DNS-SD meta label `__meta_dns_srv_name` to `__meta_dns_name` to reflect support for other record types than `SRV`. * [CHANGE] Release tarballs now contain the binaries in a nested directory. * [CHANGE] Update InfluxDB write support to work with InfluxDB 0.9.x. * [FEATURE] Support full "Go-style" escape sequences in strings and add raw string literals. * [FEATURE] Add EC2 service discovery support. * [FEATURE] Allow configuring TLS options in scrape configurations. * [FEATURE] Add instrumentation around configuration reloads. * [FEATURE] Add `bool` modifier to comparison operators to enable boolean (`0`/`1`) output instead of filtering. * [FEATURE] In Zookeeper serverset discovery, provide `__meta_serverset_shard` label with the serverset shard number. * [FEATURE] Provide `__meta_consul_service_id` meta label in Consul service discovery. * [FEATURE] Allow scalar expressions in recording rules to enable use cases such as building constant metrics. * [FEATURE] Add `label_replace()` and `vector()` query language functions. * [FEATURE] In Consul service discovery, fill in the `__meta_consul_dc` datacenter label from the Consul agent when it's not set in the Consul SD config. * [FEATURE] Scrape all services upon empty services list in Consul service discovery. * [FEATURE] Add `labelmap` relabeling action to map a set of input labels to a set of output labels using regular expressions. * [FEATURE] Introduce `__tmp` as a relabeling label prefix that is guaranteed to not be used by Prometheus internally. * [FEATURE] Kubernetes-based service discovery. * [FEATURE] Marathon-based service discovery. * [FEATURE] Support multiple series names in console graphs JavaScript library. * [FEATURE] Allow reloading configuration via web handler at `/-/reload`. * [FEATURE] Updates to promtool to reflect new Prometheus configuration features. * [FEATURE] Add `proxy_url` parameter to scrape configurations to enable use of proxy servers. * [FEATURE] Add console templates for Prometheus itself. * [FEATURE] Allow relabeling the protocol scheme of targets. * [FEATURE] Add `predict_linear()` query language function. * [FEATURE] Support for authentication using bearer tokens, client certs, and CA certs. * [FEATURE] Implement unary expressions for vector types (`-foo`, `+foo`). * [FEATURE] Add console templates for the SNMP exporter. * [FEATURE] Make it possible to relabel target scrape query parameters. * [FEATURE] Add support for `A` and `AAAA` records in DNS service discovery. * [ENHANCEMENT] Fix several flaky tests. * [ENHANCEMENT] Switch to common routing package. * [ENHANCEMENT] Use more resilient metric decoder. * [ENHANCEMENT] Update vendored dependencies. * [ENHANCEMENT] Add compression to more HTTP handlers. * [ENHANCEMENT] Make -web.external-url flag help string more verbose. * [ENHANCEMENT] Improve metrics around remote storage queues. * [ENHANCEMENT] Use Go 1.5.1 instead of Go 1.4.2 in builds. * [ENHANCEMENT] Update the architecture diagram in the `README.md`. * [ENHANCEMENT] Time out sample appends in retrieval layer if the storage is backlogging. * [ENHANCEMENT] Make `hash_mod` relabeling action use MD5 instead of FNV to enable better hash distribution. * [ENHANCEMENT] Better tracking of targets between same service discovery mechanisms in one scrape configuration. * [ENHANCEMENT] Handle parser and query evaluation runtime panics more gracefully. * [ENHANCEMENT] Add IDs to H2 tags on status page to allow anchored linking. * [BUGFIX] Fix watching multiple paths with Zookeeper serverset discovery. * [BUGFIX] Fix high CPU usage on configuration reload. * [BUGFIX] Fix disappearing `__params` on configuration reload. * [BUGFIX] Make `labelmap` action available through configuration. * [BUGFIX] Fix direct access of protobuf fields. * [BUGFIX] Fix panic on Consul request error. * [BUGFIX] Redirect of graph endpoint for prefixed setups. * [BUGFIX] Fix series file deletion behavior when purging archived series. * [BUGFIX] Fix error checking and logging around checkpointing. * [BUGFIX] Fix map initialization in target manager. * [BUGFIX] Fix draining of file watcher events in file-based service discovery. * [BUGFIX] Add `POST` handler for `/debug` endpoints to fix CPU profiling. * [BUGFIX] Fix several flaky tests. * [BUGFIX] Fix busylooping in case a scrape configuration has no target providers defined. * [BUGFIX] Fix exit behavior of static target provider. * [BUGFIX] Fix configuration reloading loop upon shutdown. * [BUGFIX] Add missing check for nil expression in expression parser. * [BUGFIX] Fix error handling bug in test code. * [BUGFIX] Fix Consul port meta label. * [BUGFIX] Fix lexer bug that treated non-Latin Unicode digits as digits. * [CLEANUP] Remove obsolete federation example from console templates. * [CLEANUP] Remove duplicated Bootstrap JS inclusion on graph page. * [CLEANUP] Switch to common log package. * [CLEANUP] Update build environment scripts and Makefiles to work better with native Go build mechanisms and new Go 1.5 experimental vendoring support. * [CLEANUP] Remove logged notice about 0.14.x configuration file format change. * [CLEANUP] Move scrape-time metric label modification into SampleAppenders. * [CLEANUP] Switch from `github.com/client_golang/model` to `github.com/common/model` and related type cleanups. * [CLEANUP] Switch from `github.com/client_golang/extraction` to `github.com/common/expfmt` and related type cleanups. * [CLEANUP] Exit Prometheus when the web server encounters a startup error. * [CLEANUP] Remove non-functional alert-silencing links on alerting page. * [CLEANUP] General cleanups to comments and code, derived from `golint`, `go vet`, or otherwise. * [CLEANUP] When entering crash recovery, tell users how to cleanly shut down Prometheus. * [CLEANUP] Remove internal support for multi-statement queries in query engine. * [CLEANUP] Update AUTHORS.md. * [CLEANUP] Don't warn/increment metric upon encountering equal timestamps for the same series upon append. * [CLEANUP] Resolve relative paths during configuration loading. ## 0.15.1 / 2015-07-27 * [BUGFIX] Fix vector matching behavior when there is a mix of equality and non-equality matchers in a vector selector and one matcher matches no series. * [ENHANCEMENT] Allow overriding `GOARCH` and `GOOS` in Makefile.INCLUDE. * [ENHANCEMENT] Update vendored dependencies. ## 0.15.0 / 2015-07-21 BREAKING CHANGES: * Relative paths for rule files are now evaluated relative to the config file. * External reachability flags (`-web.*`) consolidated. * The default storage directory has been changed from `/tmp/metrics` to `data` in the local directory. * The `rule_checker` tool has been replaced by `promtool` with different flags and more functionality. * Empty labels are now removed upon ingestion into the storage. Matching empty labels is now equivalent to matching unset labels (`mymetric{label=""}` now matches series that don't have `label` set at all). * The special `__meta_consul_tags` label in Consul service discovery now starts and ends with tag separators to enable easier regex matching. * The default scrape interval has been changed back from 1 minute to 10 seconds. All changes: * [CHANGE] Change default storage directory to `data` in the current working directory. * [CHANGE] Consolidate external reachability flags (`-web.*`)into one. * [CHANGE] Deprecate `keeping_extra` modifier keyword, rename it to `keep_common`. * [CHANGE] Improve label matching performance and treat unset labels like empty labels in label matchers. * [CHANGE] Remove `rule_checker` tool and add generic `promtool` CLI tool which allows checking rules and configuration files. * [CHANGE] Resolve rule files relative to config file. * [CHANGE] Restore default ScrapeInterval of 1 minute instead of 10 seconds. * [CHANGE] Surround `__meta_consul_tags` value with tag separators. * [CHANGE] Update node disk console for new filesystem labels. * [FEATURE] Add Consul's `ServiceAddress`, `Address`, and `ServicePort` as meta labels to enable setting a custom scrape address if needed. * [FEATURE] Add `hashmod` relabel action to allow for horizontal sharding of Prometheus servers. * [FEATURE] Add `honor_labels` scrape configuration option to not overwrite any labels exposed by the target. * [FEATURE] Add basic federation support on `/federate`. * [FEATURE] Add optional `RUNBOOK` field to alert statements. * [FEATURE] Add pre-relabel target labels to status page. * [FEATURE] Add version information endpoint under `/version`. * [FEATURE] Added initial stable API version 1 under `/api/v1`, including ability to delete series and query more metadata. * [FEATURE] Allow configuring query parameters when scraping metrics endpoints. * [FEATURE] Allow deleting time series via the new v1 API. * [FEATURE] Allow individual ingested metrics to be relabeled. * [FEATURE] Allow loading rule files from an entire directory. * [FEATURE] Allow scalar expressions in range queries, improve error messages. * [FEATURE] Support Zookeeper Serversets as a service discovery mechanism. * [ENHANCEMENT] Add circleci yaml for Dockerfile test build. * [ENHANCEMENT] Always show selected graph range, regardless of available data. * [ENHANCEMENT] Change expression input field to multi-line textarea. * [ENHANCEMENT] Enforce strict monotonicity of time stamps within a series. * [ENHANCEMENT] Export build information as metric. * [ENHANCEMENT] Improve UI of `/alerts` page. * [ENHANCEMENT] Improve display of target labels on status page. * [ENHANCEMENT] Improve initialization and routing functionality of web service. * [ENHANCEMENT] Improve target URL handling and display. * [ENHANCEMENT] New dockerfile using alpine-glibc base image and make. * [ENHANCEMENT] Other minor fixes. * [ENHANCEMENT] Preserve alert state across reloads. * [ENHANCEMENT] Prettify flag help output even more. * [ENHANCEMENT] README.md updates. * [ENHANCEMENT] Raise error on unknown config parameters. * [ENHANCEMENT] Refine v1 HTTP API output. * [ENHANCEMENT] Show original configuration file contents on status page instead of serialized YAML. * [ENHANCEMENT] Start HUP signal handler earlier to not exit upon HUP during startup. * [ENHANCEMENT] Updated vendored dependencies. * [BUGFIX] Do not panic in `StringToDuration()` on wrong duration unit. * [BUGFIX] Exit on invalid rule files on startup. * [BUGFIX] Fix a regression in the `.Path` console template variable. * [BUGFIX] Fix chunk descriptor loading. * [BUGFIX] Fix consoles "Prometheus" link to point to / * [BUGFIX] Fix empty configuration file cases * [BUGFIX] Fix float to int conversions in chunk encoding, which were broken for some architectures. * [BUGFIX] Fix overflow detection for serverset config. * [BUGFIX] Fix race conditions in retrieval layer. * [BUGFIX] Fix shutdown deadlock in Consul SD code. * [BUGFIX] Fix the race condition targets in the Makefile. * [BUGFIX] Fix value display error in web console. * [BUGFIX] Hide authentication credentials in config `String()` output. * [BUGFIX] Increment dirty counter metric in storage only if `setDirty(true)` is called. * [BUGFIX] Periodically refresh services in Consul to recover from missing events. * [BUGFIX] Prevent overwrite of default global config when loading a configuration. * [BUGFIX] Properly lex `\r` as whitespace in expression language. * [BUGFIX] Validate label names in JSON target groups. * [BUGFIX] Validate presence of regex field in relabeling configurations. * [CLEANUP] Clean up initialization of remote storage queues. * [CLEANUP] Fix `go vet` and `golint` violations. * [CLEANUP] General cleanup of rules and query language code. * [CLEANUP] Improve and simplify Dockerfile build steps. * [CLEANUP] Improve and simplify build infrastructure, use go-bindata for web assets. Allow building without git. * [CLEANUP] Move all utility packages into common `util` subdirectory. * [CLEANUP] Refactor main, flag handling, and web package. * [CLEANUP] Remove unused methods from `Rule` interface. * [CLEANUP] Simplify default config handling. * [CLEANUP] Switch human-readable times on web UI to UTC. * [CLEANUP] Use `templates.TemplateExpander` for all page templates. * [CLEANUP] Use new v1 HTTP API for querying and graphing. ## 0.14.0 / 2015-06-01 * [CHANGE] Configuration format changed and switched to YAML. (See the provided [migration tool](https://github.com/prometheus/migrate/releases).) * [ENHANCEMENT] Redesign of state-preserving target discovery. * [ENHANCEMENT] Allow specifying scrape URL scheme and basic HTTP auth for non-static targets. * [FEATURE] Allow attaching meaningful labels to targets via relabeling. * [FEATURE] Configuration/rule reloading at runtime. * [FEATURE] Target discovery via file watches. * [FEATURE] Target discovery via Consul. * [ENHANCEMENT] Simplified binary operation evaluation. * [ENHANCEMENT] More stable component initialization. * [ENHANCEMENT] Added internal expression testing language. * [BUGFIX] Fix graph links with path prefix. * [ENHANCEMENT] Allow building from source without git. * [ENHANCEMENT] Improve storage iterator performance. * [ENHANCEMENT] Change logging output format and flags. * [BUGFIX] Fix memory alignment bug for 32bit systems. * [ENHANCEMENT] Improve web redirection behavior. * [ENHANCEMENT] Allow overriding default hostname for Prometheus URLs. * [BUGFIX] Fix double slash in URL sent to alertmanager. * [FEATURE] Add resets() query function to count counter resets. * [FEATURE] Add changes() query function to count the number of times a gauge changed. * [FEATURE] Add increase() query function to calculate a counter's increase. * [ENHANCEMENT] Limit retrievable samples to the storage's retention window. ## 0.13.4 / 2015-05-23 * [BUGFIX] Fix a race while checkpointing fingerprint mappings. ## 0.13.3 / 2015-05-11 * [BUGFIX] Handle fingerprint collisions properly. * [CHANGE] Comments in rules file must start with `#`. (The undocumented `//` and `/*...*/` comment styles are no longer supported.) * [ENHANCEMENT] Switch to custom expression language parser and evaluation engine, which generates better error messages, fixes some parsing edge-cases, and enables other future enhancements (like the ones below). * [ENHANCEMENT] Limit maximum number of concurrent queries. * [ENHANCEMENT] Terminate running queries during shutdown. ## 0.13.2 / 2015-05-05 * [MAINTENANCE] Updated vendored dependencies to their newest versions. * [MAINTENANCE] Include rule_checker and console templates in release tarball. * [BUGFIX] Sort NaN as the lowest value. * [ENHANCEMENT] Add square root, stddev and stdvar functions. * [BUGFIX] Use scrape_timeout for scrape timeout, not scrape_interval. * [ENHANCEMENT] Improve chunk and chunkDesc loading, increase performance when reading from disk. * [BUGFIX] Show correct error on wrong DNS response. ## 0.13.1 / 2015-04-09 * [BUGFIX] Treat memory series with zero chunks correctly in series maintenance. * [ENHANCEMENT] Improve readability of usage text even more. ## 0.13.0 / 2015-04-08 * [ENHANCEMENT] Double-delta encoding for chunks, saving typically 40% of space, both in RAM and on disk. * [ENHANCEMENT] Redesign of chunk persistence queuing, increasing performance on spinning disks significantly. * [ENHANCEMENT] Redesign of sample ingestion, increasing ingestion performance. * [FEATURE] Added ln, log2, log10 and exp functions to the query language. * [FEATURE] Experimental write support to InfluxDB. * [FEATURE] Allow custom timestamps in instant query API. * [FEATURE] Configurable path prefix for URLs to support proxies. * [ENHANCEMENT] Increase of rule_checker CLI usability. * [CHANGE] Show special float values as gaps. * [ENHANCEMENT] Made usage output more readable. * [ENHANCEMENT] Increased resilience of the storage against data corruption. * [ENHANCEMENT] Various improvements around chunk encoding. * [ENHANCEMENT] Nicer formatting of target health table on /status. * [CHANGE] Rename UNREACHABLE to UNHEALTHY, ALIVE to HEALTHY. * [BUGFIX] Strip trailing slash in alertmanager URL. * [BUGFIX] Avoid +InfYs and similar, just display +Inf. * [BUGFIX] Fixed HTML-escaping at various places. * [BUGFIX] Fixed special value handling in division and modulo of the query language. * [BUGFIX] Fix embed-static.sh. * [CLEANUP] Added initial HTTP API tests. * [CLEANUP] Misc. other code cleanups. * [MAINTENANCE] Updated vendored dependencies to their newest versions. ## 0.12.0 / 2015-03-04 * [CHANGE] Use client_golang v0.3.1. THIS CHANGES FINGERPRINTING AND INVALIDATES ALL PERSISTED FINGERPRINTS. You have to wipe your storage to use this or later versions. There is a version guard in place that will prevent you to run Prometheus with the stored data of an older Prometheus. * [BUGFIX] The change above fixes a weakness in the fingerprinting algorithm. * [ENHANCEMENT] The change above makes fingerprinting faster and less allocation intensive. * [FEATURE] OR operator and vector matching options. See docs for details. * [ENHANCEMENT] Scientific notation and special float values (Inf, NaN) now supported by the expression language. * [CHANGE] Dockerfile makes Prometheus use the Docker volume to store data (rather than /tmp/metrics). * [CHANGE] Makefile uses Go 1.4.2. ## 0.11.1 / 2015-02-27 * [BUGFIX] Make series maintenance complete again. (Ever since 0.9.0rc4, or commit 0851945, series would not be archived, chunk descriptors would not be evicted, and stale head chunks would never be closed. This happened due to accidental deletion of a line calling a (well tested :) function. * [BUGFIX] Do not double count head chunks read from checkpoint on startup. Also fix a related but less severe bug in counting chunk descriptors. * [BUGFIX] Check last time in head chunk for head chunk timeout, not first. * [CHANGE] Update vendoring due to vendoring changes in client_golang. * [CLEANUP] Code cleanups. * [ENHANCEMENT] Limit the number of 'dirty' series counted during checkpointing. ## 0.11.0 / 2015-02-23 * [FEATURE] Introduce new metric type Histogram with server-side aggregation. * [FEATURE] Add offset operator. * [FEATURE] Add floor, ceil and round functions. * [CHANGE] Change instance identifiers to be host:port. * [CHANGE] Dependency management and vendoring changed/improved. * [CHANGE] Flag name changes to create consistency between various Prometheus binaries. * [CHANGE] Show unlimited number of metrics in autocomplete. * [CHANGE] Add query timeout. * [CHANGE] Remove labels on persist error counter. * [ENHANCEMENT] Various performance improvements for sample ingestion. * [ENHANCEMENT] Various Makefile improvements. * [ENHANCEMENT] Various console template improvements, including proof-of-concept for federation via console templates. * [ENHANCEMENT] Fix graph JS glitches and simplify graphing code. * [ENHANCEMENT] Dramatically decrease resources for file embedding. * [ENHANCEMENT] Crash recovery saves lost series data in 'orphaned' directory. * [BUGFIX] Fix aggregation grouping key calculation. * [BUGFIX] Fix Go download path for various architectures. * [BUGFIX] Fixed the link of the Travis build status image. * [BUGFIX] Fix Rickshaw/D3 version mismatch. * [CLEANUP] Various code cleanups. ## 0.10.0 / 2015-01-26 * [CHANGE] More efficient JSON result format in query API. This requires up-to-date versions of PromDash and prometheus_cli, too. * [ENHANCEMENT] Excluded non-minified Bootstrap assets and the Bootstrap maps from embedding into the binary. Those files are only used for debugging, and then you can use -web.use-local-assets. By including fewer files, the RAM usage during compilation is much more manageable. * [ENHANCEMENT] Help link points to https://prometheus.github.io now. * [FEATURE] Consoles for haproxy and cloudwatch. * [BUGFIX] Several fixes to graphs in consoles. * [CLEANUP] Removed a file size check that did not check anything. ## 0.9.0 / 2015-01-23 * [CHANGE] Reworked command line flags, now more consistent and taking into account needs of the new storage backend (see below). * [CHANGE] Metric names are dropped after certain transformations. * [CHANGE] Changed partitioning of summary metrics exported by Prometheus. * [CHANGE] Got rid of Gerrit as a review tool. * [CHANGE] 'Tabular' view now the default (rather than 'Graph') to avoid running very expensive queries accidentally. * [CHANGE] On-disk format for stored samples changed. For upgrading, you have to nuke your old files completely. See "Complete rewrite of the storage * [CHANGE] Removed 2nd argument from `delta`. * [FEATURE] Added a `deriv` function. * [FEATURE] Console templates. * [FEATURE] Added `absent` function. * [FEATURE] Allow omitting the metric name in queries. * [BUGFIX] Removed all known race conditions. * [BUGFIX] Metric mutations now handled correctly in all cases. * [ENHANCEMENT] Proper double-start protection. * [ENHANCEMENT] Complete rewrite of the storage layer. Benefits include: * Better query performance. * More samples in less RAM. * Better memory management. * Scales up to millions of time series and thousands of samples ingested per second. * Purging of obsolete samples much cleaner now, up to completely "forgetting" obsolete time series. * Proper instrumentation to diagnose the storage layer with... well... Prometheus. * Pure Go implementation, no need for cgo and shared C libraries anymore. * Better concurrency. * [ENHANCEMENT] Copy-on-write semantics in the AST layer. * [ENHANCEMENT] Switched from Go 1.3 to Go 1.4. * [ENHANCEMENT] Vendored external dependencies with godeps. * [ENHANCEMENT] Numerous Web UI improvements, moved to Bootstrap3 and Rickshaw 1.5.1. * [ENHANCEMENT] Improved Docker integration. * [ENHANCEMENT] Simplified the Makefile contraption. * [CLEANUP] Put meta-data files into proper shape (LICENSE, README.md etc.) * [CLEANUP] Removed all legitimate 'go vet' and 'golint' warnings. * [CLEANUP] Removed dead code. ## 0.8.0 / 2014-09-04 * [ENHANCEMENT] Stagger scrapes to spread out load. * [BUGFIX] Correctly quote HTTP Accept header. ## 0.7.0 / 2014-08-06 * [FEATURE] Added new functions: abs(), topk(), bottomk(), drop_common_labels(). * [FEATURE] Let console templates get graph links from expressions. * [FEATURE] Allow console templates to dynamically include other templates. * [FEATURE] Template consoles now have access to their URL. * [BUGFIX] Fixed time() function to return evaluation time, not wallclock time. * [BUGFIX] Fixed HTTP connection leak when targets returned a non-200 status. * [BUGFIX] Fixed link to console templates in UI. * [PERFORMANCE] Removed extra memory copies while scraping targets. * [ENHANCEMENT] Switched from Go 1.2.1 to Go 1.3. * [ENHANCEMENT] Made metrics exported by Prometheus itself more consistent. * [ENHANCEMENT] Removed incremental backoffs for unhealthy targets. * [ENHANCEMENT] Dockerfile also builds Prometheus support tools now. ## 0.6.0 / 2014-06-30 * [FEATURE] Added console and alert templates support, along with various template functions. * [PERFORMANCE] Much faster and more memory-efficient flushing to disk. * [ENHANCEMENT] Query results are now only logged when debugging. * [ENHANCEMENT] Upgraded to new Prometheus client library for exposing metrics. * [BUGFIX] Samples are now kept in memory until fully flushed to disk. * [BUGFIX] Non-200 target scrapes are now treated as an error. * [BUGFIX] Added installation step for missing dependency to Dockerfile. * [BUGFIX] Removed broken and unused "User Dashboard" link. ## 0.5.0 / 2014-05-28 * [BUGFIX] Fixed next retrieval time display on status page. * [BUGFIX] Updated some variable references in tools subdir. * [FEATURE] Added support for scraping metrics via the new text format. * [PERFORMANCE] Improved label matcher performance. * [PERFORMANCE] Removed JSON indentation in query API, leading to smaller response sizes. * [ENHANCEMENT] Added internal check to verify temporal order of streams. * [ENHANCEMENT] Some internal refactorings. ## 0.4.0 / 2014-04-17 * [FEATURE] Vectors and scalars may now be reversed in binary operations (` `). * [FEATURE] It's possible to shutdown Prometheus via a `/-/quit` web endpoint now. * [BUGFIX] Fix for a deadlock race condition in the memory storage. * [BUGFIX] Mac OS X build fixed. * [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling. * [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies. * [ENHANCEMENT] Cleanups around shutdown handling. * [PERFORMANCE] Preparations for better memory reuse during marshaling / unmarshaling. prometheus-2.15.2+ds/CONTRIBUTING.md000066400000000000000000000107351360540074000166340ustar00rootroot00000000000000# Contributing Prometheus uses GitHub to manage reviews of pull requests. * If you are a new contributor see: [Steps to Contribute](#steps-to-contribute) * If you have a trivial fix or improvement, go ahead and create a pull request, addressing (with `@...`) a suitable maintainer of this repository (see [MAINTAINERS.md](MAINTAINERS.md)) in the description of the pull request. * If you plan to do something more involved, first discuss your ideas on our [mailing list](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). This will avoid unnecessary work and surely give you and us a good deal of inspiration. Also please see our [non-goals issue](https://github.com/prometheus/docs/issues/149) on areas that the Prometheus community doesn't plan to work on. * Relevant coding style guidelines are the [Go Code Review Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments) and the _Formatting and style_ section of Peter Bourgon's [Go: Best Practices for Production Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style). * Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works) ## Steps to Contribute Should you wish to work on an issue, please claim it first by commenting on the GitHub issue that you want to work on it. This is to prevent duplicated efforts from contributors on the same issue. Please check the [`low-hanging-fruit`](https://github.com/prometheus/prometheus/issues?q=is%3Aissue+is%3Aopen+label%3A%22low+hanging+fruit%22) label to find issues that are good for getting started. If you have questions about one of the issues, with or without the tag, please comment on them and one of the maintainers will clarify it. For a quicker response, contact us over [IRC](https://prometheus.io/community). For complete instructions on how to compile see: [Building From Source](https://github.com/prometheus/prometheus#building-from-source) For quickly compiling and testing your changes do: ``` # For building. go build ./cmd/prometheus/ ./prometheus # For testing. make test # Make sure all the tests pass before you commit and push :) ``` We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). ## Pull Request Checklist * Branch from the master branch and, if needed, rebase to the current master branch before submitting your pull request. If it doesn't merge cleanly with master you may be asked to rebase your changes. * Commits should be as small as possible, while ensuring that each commit is correct independently (i.e., each commit should compile and pass tests). * If your patch is not getting reviewed or you need a specific person to review it, you can @-reply a reviewer asking for a review in the pull request or a comment, or you can ask for a review on IRC channel [#prometheus](https://webchat.freenode.net/?channels=#prometheus) on irc.freenode.net (for the easiest start, [join via Riot](https://riot.im/app/#/room/#prometheus:matrix.org)). * Add tests relevant to the fixed bug or new feature. ## Dependency management The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.13 or greater installed. All dependencies are vendored in the `vendor/` directory. To add or update a new dependency, use the `go get` command: ```bash # Pick the latest tagged release. go get example.com/some/module/pkg # Pick a specific version. go get example.com/some/module/pkg@vX.Y.Z ``` Tidy up the `go.mod` and `go.sum` files and copy the new/updated dependency to the `vendor/` directory: ```bash # The GO111MODULE variable can be omitted when the code isn't located in GOPATH. GO111MODULE=on go mod tidy GO111MODULE=on go mod vendor ``` You have to commit the changes to `go.mod`, `go.sum` and the `vendor/` directory before submitting the pull request. prometheus-2.15.2+ds/Dockerfile000066400000000000000000000024221360540074000163670ustar00rootroot00000000000000ARG ARCH="amd64" ARG OS="linux" FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest LABEL maintainer="The Prometheus Authors " ARG ARCH="amd64" ARG OS="linux" COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus COPY .build/${OS}-${ARCH}/promtool /bin/promtool COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml COPY console_libraries/ /usr/share/prometheus/console_libraries/ COPY consoles/ /usr/share/prometheus/consoles/ COPY LICENSE /LICENSE COPY NOTICE /NOTICE COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2 RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ RUN mkdir -p /prometheus && \ chown -R nobody:nogroup etc/prometheus /prometheus USER nobody EXPOSE 9090 VOLUME [ "/prometheus" ] WORKDIR /prometheus ENTRYPOINT [ "/bin/prometheus" ] CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ "--storage.tsdb.path=/prometheus", \ "--web.console.libraries=/usr/share/prometheus/console_libraries", \ "--web.console.templates=/usr/share/prometheus/consoles" ] prometheus-2.15.2+ds/LICENSE000066400000000000000000000261351360540074000154110ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. prometheus-2.15.2+ds/MAINTAINERS.md000066400000000000000000000013611360540074000164720ustar00rootroot00000000000000@brian-brazil is the main/default maintainer, some parts of the codebase have other maintainers: * `cmd` * `promtool`: @simonpasquier * `discovery` * `k8s`: @brancz * `documentation` * `prometheus-mixin`: @beorn7 * `storage` * `remote`: @csmarchbanks, @cstyan * `tsdb`: @codesome, @krasi-georgiev * `web` * `ui`: @juliusv * `Makefile` and related build configuration: @simonpasquier, @SuperQ For the sake of brevity all subtrees are not explicitly listed. Due to the size of this repository, the natural changes in focus of maintainers over time, and nuances of where particular features live, this list will always be incomplete and out of date. However the listed maintainer(s) should be able to direct a PR/question to the right person. prometheus-2.15.2+ds/Makefile000066400000000000000000000076201360540074000160420ustar00rootroot00000000000000# Copyright 2018 The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Needs to be defined before including Makefile.common to auto-generate targets DOCKER_ARCHS ?= amd64 armv7 arm64 REACT_APP_PATH = web/ui/react-app REACT_APP_SOURCE_FILES = $(wildcard $(REACT_APP_PATH)/public/* $(REACT_APP_PATH)/src/* $(REACT_APP_PATH)/tsconfig.json) REACT_APP_OUTPUT_DIR = web/ui/static/react REACT_APP_NODE_MODULES_PATH = $(REACT_APP_PATH)/node_modules REACT_APP_NPM_LICENSES_TARBALL = "npm_licenses.tar.bz2" TSDB_PROJECT_DIR = "./tsdb" TSDB_CLI_DIR="$(TSDB_PROJECT_DIR)/cmd/tsdb" TSDB_BIN = "$(TSDB_CLI_DIR)/tsdb" TSDB_BENCHMARK_NUM_METRICS ?= 1000 TSDB_BENCHMARK_DATASET ?= "$(TSDB_PROJECT_DIR)/testdata/20kseries.json" TSDB_BENCHMARK_OUTPUT_DIR ?= "$(TSDB_CLI_DIR)/benchout" include Makefile.common DOCKER_IMAGE_NAME ?= prometheus $(REACT_APP_NODE_MODULES_PATH): $(REACT_APP_PATH)/package.json $(REACT_APP_PATH)/yarn.lock cd $(REACT_APP_PATH) && yarn --frozen-lockfile $(REACT_APP_OUTPUT_DIR): $(REACT_APP_NODE_MODULES_PATH) $(REACT_APP_SOURCE_FILES) @echo ">> building React app" @./scripts/build_react_app.sh .PHONY: assets assets: $(REACT_APP_OUTPUT_DIR) @echo ">> writing assets" # Un-setting GOOS and GOARCH here because the generated Go code is always the same, # but the cached object code is incompatible between architectures and OSes (which # breaks cross-building for different combinations on CI in the same container). cd web/ui && GO111MODULE=$(GO111MODULE) GOOS= GOARCH= $(GO) generate -x -v $(GOOPTS) @$(GOFMT) -w ./web/ui .PHONY: react-app-lint react-app-lint: @echo ">> running React app linting" cd $(REACT_APP_PATH) && yarn lint:ci .PHONY: react-app-lint-fix react-app-lint-fix: @echo ">> running React app linting and fixing errors where possibe" cd $(REACT_APP_PATH) && yarn lint .PHONY: react-app-test react-app-test: | $(REACT_APP_NODE_MODULES_PATH) react-app-lint @echo ">> running React app tests" cd $(REACT_APP_PATH) && yarn test --no-watch --coverage .PHONY: test test: common-test react-app-test .PHONY: npm_licenses npm_licenses: $(REACT_APP_NODE_MODULES_PATH) @echo ">> bundling npm licenses" rm -f $(REACT_APP_NPM_LICENSES_TARBALL) find $(REACT_APP_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --transform 's/^/npm_licenses\//' --files-from=- .PHONY: tarball tarball: npm_licenses common-tarball .PHONY: docker docker: npm_licenses common-docker .PHONY: build build: assets common-build .PHONY: build_tsdb build_tsdb: GO111MODULE=$(GO111MODULE) $(GO) build -o $(TSDB_BIN) $(TSDB_CLI_DIR) .PHONY: bench_tsdb bench_tsdb: build_tsdb @echo ">> running benchmark, writing result to $(TSDB_BENCHMARK_OUTPUT_DIR)" @$(TSDB_BIN) bench write --metrics=$(TSDB_BENCHMARK_NUM_METRICS) --out=$(TSDB_BENCHMARK_OUTPUT_DIR) $(TSDB_BENCHMARK_DATASET) @$(GO) tool pprof -svg $(TSDB_BIN) $(TSDB_BENCHMARK_OUTPUT_DIR)/cpu.prof > $(TSDB_BENCHMARK_OUTPUT_DIR)/cpuprof.svg @$(GO) tool pprof --inuse_space -svg $(TSDB_BIN) $(TSDB_BENCHMARK_OUTPUT_DIR)/mem.prof > $(TSDB_BENCHMARK_OUTPUT_DIR)/memprof.inuse.svg @$(GO) tool pprof --alloc_space -svg $(TSDB_BIN) $(TSDB_BENCHMARK_OUTPUT_DIR)/mem.prof > $(TSDB_BENCHMARK_OUTPUT_DIR)/memprof.alloc.svg @$(GO) tool pprof -svg $(TSDB_BIN) $(TSDB_BENCHMARK_OUTPUT_DIR)/block.prof > $(TSDB_BENCHMARK_OUTPUT_DIR)/blockprof.svg @$(GO) tool pprof -svg $(TSDB_BIN) $(TSDB_BENCHMARK_OUTPUT_DIR)/mutex.prof > $(TSDB_BENCHMARK_OUTPUT_DIR)/mutexprof.svg prometheus-2.15.2+ds/Makefile.common000066400000000000000000000217771360540074000173420ustar00rootroot00000000000000# Copyright 2018 The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # A common Makefile that includes rules to be reused in different prometheus projects. # !!! Open PRs only against the prometheus/prometheus/Makefile.common repository! # Example usage : # Create the main Makefile in the root project directory. # include Makefile.common # customTarget: # @echo ">> Running customTarget" # # Ensure GOBIN is not set during build so that promu is installed to the correct path unexport GOBIN GO ?= go GOFMT ?= $(GO)fmt FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) GOOPTS ?= GOHOSTOS ?= $(shell $(GO) env GOHOSTOS) GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH) GO_VERSION ?= $(shell $(GO) version) GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION)) PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.') GOVENDOR := GO111MODULE := ifeq (, $(PRE_GO_111)) ifneq (,$(wildcard go.mod)) # Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI). GO111MODULE := on ifneq (,$(wildcard vendor)) # Always use the local vendor/ directory to satisfy the dependencies. GOOPTS := $(GOOPTS) -mod=vendor endif endif else ifneq (,$(wildcard go.mod)) ifneq (,$(wildcard vendor)) $(warning This repository requires Go >= 1.11 because of Go modules) $(warning Some recipes may not work as expected as the current Go runtime is '$(GO_VERSION_NUMBER)') endif else # This repository isn't using Go modules (yet). GOVENDOR := $(FIRST_GOPATH)/bin/govendor endif endif PROMU := $(FIRST_GOPATH)/bin/promu pkgs = ./... ifeq (arm, $(GOHOSTARCH)) GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM) GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM) else GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) endif PROMU_VERSION ?= 0.5.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= GOLANGCI_LINT_VERSION ?= v1.18.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint endif endif PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) DOCKERFILE_PATH ?= ./Dockerfile DOCKERBUILD_CONTEXT ?= ./ DOCKER_REPO ?= prom DOCKER_ARCHS ?= amd64 BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS)) ifeq ($(GOHOSTARCH),amd64) ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows)) # Only supported on amd64 test-flags := -race endif endif # This rule is used to forward a target like "build" to "common-build". This # allows a new "build" target to be defined in a Makefile which includes this # one and override "common-build" without override warnings. %: common-% ; .PHONY: common-all common-all: precheck style check_license lint unused build test .PHONY: common-style common-style: @echo ">> checking code style" @fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \ if [ -n "$${fmtRes}" ]; then \ echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \ echo "Please ensure you are using $$($(GO) version) for formatting code."; \ exit 1; \ fi .PHONY: common-check_license common-check_license: @echo ">> checking license header" @licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \ awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \ done); \ if [ -n "$${licRes}" ]; then \ echo "license header checking failed:"; echo "$${licRes}"; \ exit 1; \ fi .PHONY: common-deps common-deps: @echo ">> getting dependencies" ifdef GO111MODULE GO111MODULE=$(GO111MODULE) $(GO) mod download else $(GO) get $(GOOPTS) -t ./... endif .PHONY: common-test-short common-test-short: @echo ">> running short tests" GO111MODULE=$(GO111MODULE) $(GO) test -short $(GOOPTS) $(pkgs) .PHONY: common-test common-test: @echo ">> running all tests" GO111MODULE=$(GO111MODULE) $(GO) test $(test-flags) $(GOOPTS) $(pkgs) .PHONY: common-format common-format: @echo ">> formatting code" GO111MODULE=$(GO111MODULE) $(GO) fmt $(pkgs) .PHONY: common-vet common-vet: @echo ">> vetting code" GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs) .PHONY: common-lint common-lint: $(GOLANGCI_LINT) ifdef GOLANGCI_LINT @echo ">> running golangci-lint" ifdef GO111MODULE # 'go list' needs to be executed before staticcheck to prepopulate the modules cache. # Otherwise staticcheck might fail randomly for some reason not yet explained. GO111MODULE=$(GO111MODULE) $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null GO111MODULE=$(GO111MODULE) $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs) else $(GOLANGCI_LINT) run $(pkgs) endif endif # For backward-compatibility. .PHONY: common-staticcheck common-staticcheck: lint .PHONY: common-unused common-unused: $(GOVENDOR) ifdef GOVENDOR @echo ">> running check for unused packages" @$(GOVENDOR) list +unused | grep . && exit 1 || echo 'No unused packages' else ifdef GO111MODULE @echo ">> running check for unused/missing packages in go.mod" GO111MODULE=$(GO111MODULE) $(GO) mod tidy ifeq (,$(wildcard vendor)) @git diff --exit-code -- go.sum go.mod else @echo ">> running check for unused packages in vendor/" GO111MODULE=$(GO111MODULE) $(GO) mod vendor @git diff --exit-code -- go.sum go.mod vendor/ endif endif endif .PHONY: common-build common-build: promu @echo ">> building binaries" GO111MODULE=$(GO111MODULE) $(PROMU) build --prefix $(PREFIX) $(PROMU_BINARIES) .PHONY: common-tarball common-tarball: promu @echo ">> building release tarball" $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) .PHONY: common-docker $(BUILD_DOCKER_ARCHS) common-docker: $(BUILD_DOCKER_ARCHS) $(BUILD_DOCKER_ARCHS): common-docker-%: docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \ -f $(DOCKERFILE_PATH) \ --build-arg ARCH="$*" \ --build-arg OS="linux" \ $(DOCKERBUILD_CONTEXT) .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) common-docker-publish: $(PUBLISH_DOCKER_ARCHS) $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) common-docker-tag-latest: $(TAG_DOCKER_ARCHS) $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest" .PHONY: common-docker-manifest common-docker-manifest: DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(DOCKER_IMAGE_TAG)) DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .PHONY: promu promu: $(PROMU) $(PROMU): $(eval PROMU_TMP := $(shell mktemp -d)) curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP) mkdir -p $(FIRST_GOPATH)/bin cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu rm -r $(PROMU_TMP) .PHONY: proto proto: @echo ">> generating code from proto files" @./scripts/genproto.sh ifdef GOLANGCI_LINT $(GOLANGCI_LINT): mkdir -p $(FIRST_GOPATH)/bin curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \ | sed -e '/install -d/d' \ | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) endif ifdef GOVENDOR .PHONY: $(GOVENDOR) $(GOVENDOR): GOOS= GOARCH= $(GO) get -u github.com/kardianos/govendor endif .PHONY: precheck precheck:: define PRECHECK_COMMAND_template = precheck:: $(1)_precheck PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1))) .PHONY: $(1)_precheck $(1)_precheck: @if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \ echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \ exit 1; \ fi endef prometheus-2.15.2+ds/NOTICE000066400000000000000000000061601360540074000153040ustar00rootroot00000000000000The Prometheus systems and service monitoring server Copyright 2012-2015 The Prometheus Authors This product includes software developed at SoundCloud Ltd. (https://soundcloud.com/). The following components are included in this product: Bootstrap https://getbootstrap.com Copyright 2011-2014 Twitter, Inc. Licensed under the MIT License bootstrap3-typeahead.js https://github.com/bassjobsen/Bootstrap-3-Typeahead Original written by @mdo and @fat Copyright 2014 Bass Jobsen @bassjobsen Licensed under the Apache License, Version 2.0 fuzzy https://github.com/mattyork/fuzzy Original written by @mattyork Copyright 2012 Matt York Licensed under the MIT License bootstrap-datetimepicker.js https://github.com/Eonasdan/bootstrap-datetimepicker Copyright 2015 Jonathan Peterson (@Eonasdan) Licensed under the MIT License moment.js https://github.com/moment/moment/ Copyright JS Foundation and other contributors Licensed under the MIT License Rickshaw https://github.com/shutterstock/rickshaw Copyright 2011-2014 by Shutterstock Images, LLC See https://github.com/shutterstock/rickshaw/blob/master/LICENSE for license details mustache.js https://github.com/janl/mustache.js Copyright 2009 Chris Wanstrath (Ruby) Copyright 2010-2014 Jan Lehnardt (JavaScript) Copyright 2010-2015 The mustache.js community Licensed under the MIT License jQuery https://jquery.org Copyright jQuery Foundation and other contributors Licensed under the MIT License Protocol Buffers for Go with Gadgets https://github.com/gogo/protobuf/ Copyright (c) 2013, The GoGo Authors. See source code for license details. Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ Copyright 2013 Google Inc. Licensed under the Apache License, Version 2.0 Support for streaming Protocol Buffer messages for the Go language (golang). https://github.com/matttproud/golang_protobuf_extensions Copyright 2013 Matt T. Proud Licensed under the Apache License, Version 2.0 DNS library in Go https://miek.nl/2014/august/16/go-dns-package/ Copyright 2009 The Go Authors, 2011 Miek Gieben See https://github.com/miekg/dns/blob/master/LICENSE for license details. LevelDB key/value database in Go https://github.com/syndtr/goleveldb Copyright 2012 Suryandaru Triandana See https://github.com/syndtr/goleveldb/blob/master/LICENSE for license details. gosnappy - a fork of code.google.com/p/snappy-go https://github.com/syndtr/gosnappy Copyright 2011 The Snappy-Go Authors See https://github.com/syndtr/gosnappy/blob/master/LICENSE for license details. go-zookeeper - Native ZooKeeper client for Go https://github.com/samuel/go-zookeeper Copyright (c) 2013, Samuel Stauffer See https://github.com/samuel/go-zookeeper/blob/master/LICENSE for license details. We also use code from a large number of npm packages. For details, see: - https://github.com/prometheus/prometheus/blob/master/web/ui/react-app/package.json - https://github.com/prometheus/prometheus/blob/master/web/ui/react-app/package-lock.json - The individual package licenses as copied from the node_modules directory can be found in the npm_licenses.tar.bz2 archive in release tarballs and Docker images. prometheus-2.15.2+ds/README.md000066400000000000000000000131371360540074000156610ustar00rootroot00000000000000# Prometheus [![CircleCI](https://circleci.com/gh/prometheus/prometheus/tree/master.svg?style=shield)][circleci] [![Docker Repository on Quay](https://quay.io/repository/prometheus/prometheus/status)][quay] [![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub] [![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/prometheus)](https://goreportcard.com/report/github.com/prometheus/prometheus) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486) [![fuzzit](https://app.fuzzit.dev/badge?org_id=prometheus&branch=master)](https://fuzzit.dev) Visit [prometheus.io](https://prometheus.io) for the full documentation, examples and guides. Prometheus, a [Cloud Native Computing Foundation](https://cncf.io/) project, is a systems and service monitoring system. It collects metrics from configured targets at given intervals, evaluates rule expressions, displays the results, and can trigger alerts if some condition is observed to be true. Prometheus's main distinguishing features as compared to other monitoring systems are: - a **multi-dimensional** data model (timeseries defined by metric name and set of key/value dimensions) - a **flexible query language** to leverage this dimensionality - no dependency on distributed storage; **single server nodes are autonomous** - timeseries collection happens via a **pull model** over HTTP - **pushing timeseries** is supported via an intermediary gateway - targets are discovered via **service discovery** or **static configuration** - multiple modes of **graphing and dashboarding support** - support for hierarchical and horizontal **federation** ## Architecture overview ![](https://cdn.jsdelivr.net/gh/prometheus/prometheus@c34257d069c630685da35bcef084632ffd5d6209/documentation/images/architecture.svg) ## Install There are various ways of installing Prometheus. ### Precompiled binaries Precompiled binaries for released versions are available in the [*download* section](https://prometheus.io/download/) on [prometheus.io](https://prometheus.io). Using the latest production release binary is the recommended way of installing Prometheus. See the [Installing](https://prometheus.io/docs/introduction/install/) chapter in the documentation for all the details. Debian packages [are available](https://packages.debian.org/sid/net/prometheus). ### Docker images Docker images are available on [Quay.io](https://quay.io/repository/prometheus/prometheus) or [Docker Hub](https://hub.docker.com/r/prom/prometheus/). You can launch a Prometheus container for trying it out with $ docker run --name prometheus -d -p 127.0.0.1:9090:9090 prom/prometheus Prometheus will now be reachable at http://localhost:9090/. ### Building from source To build Prometheus from the source code yourself you need to have a working Go environment with [version 1.13 or greater installed](https://golang.org/doc/install). You will also need to have [Node.js](https://nodejs.org/) and [Yarn](https://yarnpkg.com/) installed in order to build the frontend assets. You can directly use the `go` tool to download and install the `prometheus` and `promtool` binaries into your `GOPATH`: $ go get github.com/prometheus/prometheus/cmd/... $ prometheus --config.file=your_config.yml *However*, when using `go get` to build Prometheus, Prometheus will expect to be able to read its web assets from local filesystem directories under `web/ui/static` and `web/ui/templates`. In order for these assets to be found, you will have to run Prometheus from the root of the cloned repository. Note also that these directories do not include the new experimental React UI unless it has been built explicitly using `make assets` or `make build`. An example of the above configuration file can be found [here.](https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus.yml) You can also clone the repository yourself and build using `make build`, which will compile in the web assets so that Prometheus can be run from anywhere: $ mkdir -p $GOPATH/src/github.com/prometheus $ cd $GOPATH/src/github.com/prometheus $ git clone https://github.com/prometheus/prometheus.git $ cd prometheus $ make build $ ./prometheus --config.file=your_config.yml The Makefile provides several targets: * *build*: build the `prometheus` and `promtool` binaries (includes building and compiling in web assets) * *test*: run the tests * *test-short*: run the short tests * *format*: format the source code * *vet*: check the source code for common errors * *docker*: build a docker container for the current `HEAD` ## React UI Development For more information on building, running, and developing on the new React-based UI, see the React app's [README.md](https://github.com/prometheus/prometheus/blob/master/web/ui/react-app/README.md). ## More information * The source code is periodically indexed: [Prometheus Core](https://godoc.org/github.com/prometheus/prometheus). * You will find a CircleCI configuration in `.circleci/config.yml`. * See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels. ## Contributing Refer to [CONTRIBUTING.md](https://github.com/prometheus/prometheus/blob/master/CONTRIBUTING.md) ## License Apache License 2.0, see [LICENSE](https://github.com/prometheus/prometheus/blob/master/LICENSE). [hub]: https://hub.docker.com/r/prom/prometheus/ [circleci]: https://circleci.com/gh/prometheus/prometheus [quay]: https://quay.io/repository/prometheus/prometheus prometheus-2.15.2+ds/RELEASE.md000066400000000000000000000202431360540074000160000ustar00rootroot00000000000000# Releases This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release shepherd. Release shepherds are chosen on a voluntary basis. ## Release schedule Release cadence of first pre-releases being cut is 6 weeks. | release series | date of first pre-release (year-month-day) | release shepherd | |----------------|--------------------------------------------|---------------------------------------------| | v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) | | v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) | | v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) | | v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) | | v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) | | v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) | | v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) | | v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) | | v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) | | v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) | | v2.14 | 2019-11-06 | Chris Marchbanks (GitHub: @csmarchbanks) | | v2.15 | 2019-12-18 | Bartek Plotka (GitHub: @bwplotka) | | v2.16 | 2020-01-29 | **searching for volunteer** | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. ## Release shepherd responsibilities The release shepherd is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process formally starts with the initial pre-release, but some preparations should be done a few days in advance. * We aim to keep the master branch in a working state at all times. In principle, it should be possible to cut a release from master at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of master. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release. * On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-.` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release. * With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. * If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.). See the next section for details on cutting an individual release. ## How to cut an individual release These instructions are currently valid for the Prometheus server, i.e. the [prometheus/prometheus repository](https://github.com/prometheus/prometheus). Applicability to other Prometheus repositories depends on the current state of each repository. We aspire to unify the release procedures as much as possible. ### Branch management and versioning strategy We use [Semantic Versioning](https://semver.org/). We maintain a separate branch for each minor release, named `release-.`, e.g. `release-1.1`, `release-2.0`. The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. As long as master hasn't deviated from the release branch, new commits can also go to master, followed by merging master back into the release branch. If a bug fix got accidentally merged into master after non-bug-fix changes in master, the bug-fix commits have to be cherry-picked into the release branch, which then have to be merged back into master. Try to avoid that situation. Maintaining the release branches for older minor releases happens on a best effort basis. ### Prepare your release For a patch release, work in the branch of the minor release you want to patch. For a new major or minor release, create the corresponding release branch based on the master branch. Bump the version in the `VERSION` file and update `CHANGELOG.md`. Do this in a proper PR pointing to the release branch as this gives others the opportunity to chime in on the release in general and on the addition to the changelog in particular. Note that `CHANGELOG.md` should only document changes relevant to users of Prometheus, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. Entries in the `CHANGELOG.md` are meant to be in this order: * `[CHANGE]` * `[FEATURE]` * `[ENHANCEMENT]` * `[BUGFIX]` ### Draft the new release Tag the new release with a tag named `v..`, e.g. `v2.1.3`. Note the `v` prefix. You can do the tagging on the commandline: ```bash $ tag=$(< VERSION) $ git tag -s "v${tag}" -m "v${tag}" $ git push --tags ``` Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing. Once a tag is created, the release process through CircleCI will be triggered for this tag and Circle CI will draft the GitHub release using the `prombot` account. Now all you can do is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification. ### Wrapping up If the release has happened in the latest release branch, merge the changes into master. To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration (note: only actually merge this for final releases, not for pre-releases like a release candidate). Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. ### Pre-releases The following changes to the above procedures apply: * In line with [Semantic Versioning](https://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). * Tick the _This is a pre-release_ box when drafting the release in the Github UI. * Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update. * Run the benchmark for 3 days using the `/benchmark x.y.z` command, `x.y.z` being the latest stable patch release of the previous minor release series. prometheus-2.15.2+ds/VERSION000066400000000000000000000000071360540074000154420ustar00rootroot000000000000002.15.2 prometheus-2.15.2+ds/cmd/000077500000000000000000000000001360540074000151405ustar00rootroot00000000000000prometheus-2.15.2+ds/cmd/prometheus/000077500000000000000000000000001360540074000173335ustar00rootroot00000000000000prometheus-2.15.2+ds/cmd/prometheus/main.go000066400000000000000000000700331360540074000206110ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // The main package for the Prometheus server executable. package main import ( "context" "fmt" "net" "net/http" _ "net/http/pprof" // Comment this line to disable pprof endpoint. "net/url" "os" "os/signal" "path/filepath" "regexp" "runtime" "strings" "sync" "syscall" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" conntrack "github.com/mwitkow/go-conntrack" "github.com/oklog/run" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/common/promlog" "github.com/prometheus/common/version" kingpin "gopkg.in/alecthomas/kingpin.v2" "k8s.io/klog" promlogflag "github.com/prometheus/common/promlog/flag" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/relabel" prom_runtime "github.com/prometheus/prometheus/pkg/runtime" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/storage/tsdb" "github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/web" ) var ( configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "prometheus_config_last_reload_successful", Help: "Whether the last configuration reload attempt was successful.", }) configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "prometheus_config_last_reload_success_timestamp_seconds", Help: "Timestamp of the last successful configuration reload.", }) defaultRetentionString = "15d" defaultRetentionDuration model.Duration ) func init() { prometheus.MustRegister(version.NewCollector("prometheus")) var err error defaultRetentionDuration, err = model.ParseDuration(defaultRetentionString) if err != nil { panic(err) } } func main() { if os.Getenv("DEBUG") != "" { runtime.SetBlockProfileRate(20) runtime.SetMutexProfileFraction(20) } var ( oldFlagRetentionDuration model.Duration newFlagRetentionDuration model.Duration ) cfg := struct { configFile string localStoragePath string notifier notifier.Options notifierTimeout model.Duration forGracePeriod model.Duration outageTolerance model.Duration resendDelay model.Duration web web.Options tsdb tsdb.Options lookbackDelta model.Duration webTimeout model.Duration queryTimeout model.Duration queryConcurrency int queryMaxSamples int RemoteFlushDeadline model.Duration prometheusURL string corsRegexString string promlogConfig promlog.Config }{ notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, }, web: web.Options{ Registerer: prometheus.DefaultRegisterer, Gatherer: prometheus.DefaultGatherer, }, promlogConfig: promlog.Config{}, } a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server") a.Version(version.Print("prometheus")) a.HelpFlag.Short('h') a.Flag("config.file", "Prometheus configuration file path."). Default("prometheus.yml").StringVar(&cfg.configFile) a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry."). Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress) a.Flag("web.read-timeout", "Maximum duration before timing out read of the request, and closing idle connections."). Default("5m").SetValue(&cfg.webTimeout) a.Flag("web.max-connections", "Maximum number of simultaneous connections."). Default("512").IntVar(&cfg.web.MaxConnections) a.Flag("web.external-url", "The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically."). PlaceHolder("").StringVar(&cfg.prometheusURL) a.Flag("web.route-prefix", "Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url."). PlaceHolder("").StringVar(&cfg.web.RoutePrefix) a.Flag("web.user-assets", "Path to static asset directory, available at /user."). PlaceHolder("").StringVar(&cfg.web.UserAssetsPath) a.Flag("web.enable-lifecycle", "Enable shutdown and reload via HTTP request."). Default("false").BoolVar(&cfg.web.EnableLifecycle) a.Flag("web.enable-admin-api", "Enable API endpoints for admin control actions."). Default("false").BoolVar(&cfg.web.EnableAdminAPI) a.Flag("web.console.templates", "Path to the console template directory, available at /consoles."). Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath) a.Flag("web.console.libraries", "Path to the console library directory."). Default("console_libraries").StringVar(&cfg.web.ConsoleLibrariesPath) a.Flag("web.page-title", "Document title of Prometheus instance."). Default("Prometheus Time Series Collection and Processing Server").StringVar(&cfg.web.PageTitle) a.Flag("web.cors.origin", `Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com'`). Default(".*").StringVar(&cfg.corsRegexString) a.Flag("storage.tsdb.path", "Base path for metrics storage."). Default("data/").StringVar(&cfg.localStoragePath) a.Flag("storage.tsdb.min-block-duration", "Minimum duration of a data block before being persisted. For use in testing."). Hidden().Default("2h").SetValue(&cfg.tsdb.MinBlockDuration) a.Flag("storage.tsdb.max-block-duration", "Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period.)"). Hidden().PlaceHolder("").SetValue(&cfg.tsdb.MaxBlockDuration) a.Flag("storage.tsdb.wal-segment-size", "Size at which to split the tsdb WAL segment files. Example: 100MB"). Hidden().PlaceHolder("").BytesVar(&cfg.tsdb.WALSegmentSize) a.Flag("storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead."). SetValue(&oldFlagRetentionDuration) a.Flag("storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms."). SetValue(&newFlagRetentionDuration) a.Flag("storage.tsdb.retention.size", "[EXPERIMENTAL] Maximum number of bytes that can be stored for blocks. Units supported: KB, MB, GB, TB, PB. This flag is experimental and can be changed in future releases."). BytesVar(&cfg.tsdb.MaxBytes) a.Flag("storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) a.Flag("storage.tsdb.allow-overlapping-blocks", "[EXPERIMENTAL] Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge."). Default("false").BoolVar(&cfg.tsdb.AllowOverlappingBlocks) a.Flag("storage.tsdb.wal-compression", "Compress the tsdb WAL."). Default("false").BoolVar(&cfg.tsdb.WALCompression) a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload."). Default("1m").PlaceHolder("").SetValue(&cfg.RemoteFlushDeadline) a.Flag("storage.remote.read-sample-limit", "Maximum overall number of samples to return via the remote read interface, in a single query. 0 means no limit. This limit is ignored for streamed response types."). Default("5e7").IntVar(&cfg.web.RemoteReadSampleLimit) a.Flag("storage.remote.read-concurrent-limit", "Maximum number of concurrent remote read calls. 0 means no limit."). Default("10").IntVar(&cfg.web.RemoteReadConcurrencyLimit) a.Flag("storage.remote.read-max-bytes-in-frame", "Maximum number of bytes in a single frame for streaming remote read response types before marshalling. Note that client might have limit on frame size as well. 1MB as recommended by protobuf by default."). Default("1048576").IntVar(&cfg.web.RemoteReadBytesInFrame) a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring \"for\" state of alert."). Default("1h").SetValue(&cfg.outageTolerance) a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored \"for\" state. This is maintained only for alerts with configured \"for\" time greater than grace period."). Default("10m").SetValue(&cfg.forGracePeriod) a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications."). Default("10000").IntVar(&cfg.notifier.QueueCapacity) a.Flag("alertmanager.timeout", "Timeout for sending alerts to Alertmanager."). Default("10s").SetValue(&cfg.notifierTimeout) a.Flag("query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations."). Default("5m").SetValue(&cfg.lookbackDelta) a.Flag("query.timeout", "Maximum time a query may take before being aborted."). Default("2m").SetValue(&cfg.queryTimeout) a.Flag("query.max-concurrency", "Maximum number of queries executed concurrently."). Default("20").IntVar(&cfg.queryConcurrency) a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return."). Default("50000000").IntVar(&cfg.queryMaxSamples) promlogflag.AddFlags(a, &cfg.promlogConfig) _, err := a.Parse(os.Args[1:]) if err != nil { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "Error parsing commandline arguments")) a.Usage(os.Args[1:]) os.Exit(2) } logger := promlog.New(&cfg.promlogConfig) cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress) if err != nil { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", cfg.prometheusURL)) os.Exit(2) } cfg.web.CORSOrigin, err = compileCORSRegexString(cfg.corsRegexString) if err != nil { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "could not compile CORS regex string %q", cfg.corsRegexString)) os.Exit(2) } cfg.web.ReadTimeout = time.Duration(cfg.webTimeout) // Default -web.route-prefix to path of -web.external-url. if cfg.web.RoutePrefix == "" { cfg.web.RoutePrefix = cfg.web.ExternalURL.Path } // RoutePrefix must always be at least '/'. cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") { // Time retention settings. if oldFlagRetentionDuration != 0 { level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.") cfg.tsdb.RetentionDuration = oldFlagRetentionDuration } // When the new flag is set it takes precedence. if newFlagRetentionDuration != 0 { cfg.tsdb.RetentionDuration = newFlagRetentionDuration } if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration level.Info(logger).Log("msg", "no time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. if cfg.tsdb.RetentionDuration < 0 { y, err := model.ParseDuration("100y") if err != nil { panic(err) } cfg.tsdb.RetentionDuration = y level.Warn(logger).Log("msg", "time retention value is too high. Limiting to: "+y.String()) } } { // Max block size settings. if cfg.tsdb.MaxBlockDuration == 0 { maxBlockDuration, err := model.ParseDuration("31d") if err != nil { panic(err) } // When the time retention is set and not too big use to define the max block duration. if cfg.tsdb.RetentionDuration != 0 && cfg.tsdb.RetentionDuration/10 < maxBlockDuration { maxBlockDuration = cfg.tsdb.RetentionDuration / 10 } cfg.tsdb.MaxBlockDuration = maxBlockDuration } } promql.LookbackDelta = time.Duration(cfg.lookbackDelta) promql.SetDefaultEvaluationInterval(time.Duration(config.DefaultGlobalConfig.EvaluationInterval)) // Above level 6, the k8s client would log bearer tokens in clear-text. klog.ClampLevel(6) klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info()) level.Info(logger).Log("build_context", version.BuildContext()) level.Info(logger).Log("host_details", prom_runtime.Uname()) level.Info(logger).Log("fd_limits", prom_runtime.FdLimits()) level.Info(logger).Log("vm_limits", prom_runtime.VmLimits()) var ( localStorage = &tsdb.ReadyStorage{} remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, cfg.localStoragePath, time.Duration(cfg.RemoteFlushDeadline)) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) var ( ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) ctxScrape, cancelScrape = context.WithCancel(context.Background()) discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) ctxNotify, cancelNotify = context.WithCancel(context.Background()) discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify")) scrapeManager = scrape.NewManager(log.With(logger, "component", "scrape manager"), fanoutStorage) opts = promql.EngineOpts{ Logger: log.With(logger, "component", "query engine"), Reg: prometheus.DefaultRegisterer, MaxConcurrent: cfg.queryConcurrency, MaxSamples: cfg.queryMaxSamples, Timeout: time.Duration(cfg.queryTimeout), ActiveQueryTracker: promql.NewActiveQueryTracker(cfg.localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")), } queryEngine = promql.NewEngine(opts) ruleManager = rules.NewManager(&rules.ManagerOptions{ Appendable: fanoutStorage, TSDB: localStorage, QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage), NotifyFunc: sendAlerts(notifierManager, cfg.web.ExternalURL.String()), Context: ctxRule, ExternalURL: cfg.web.ExternalURL, Registerer: prometheus.DefaultRegisterer, Logger: log.With(logger, "component", "rule manager"), OutageTolerance: time.Duration(cfg.outageTolerance), ForGracePeriod: time.Duration(cfg.forGracePeriod), ResendDelay: time.Duration(cfg.resendDelay), }) ) cfg.web.Context = ctxWeb cfg.web.TSDB = localStorage.Get cfg.web.Storage = fanoutStorage cfg.web.QueryEngine = queryEngine cfg.web.ScrapeManager = scrapeManager cfg.web.RuleManager = ruleManager cfg.web.Notifier = notifierManager cfg.web.TSDBCfg = cfg.tsdb cfg.web.Version = &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } cfg.web.Flags = map[string]string{} // Exclude kingpin default flags to expose only Prometheus ones. boilerplateFlags := kingpin.New("", "").Version("") for _, f := range a.Model().Flags { if boilerplateFlags.GetFlag(f.Name) != nil { continue } cfg.web.Flags[f.Name] = f.Value.String() } // Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager. webHandler := web.New(log.With(logger, "component", "web"), &cfg.web) // Monitor outgoing connections on default transport with conntrack. http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc( conntrack.DialWithTracing(), ) reloaders := []func(cfg *config.Config) error{ remoteStorage.ApplyConfig, webHandler.ApplyConfig, // The Scrape and notifier managers need to reload before the Discovery manager as // they need to read the most updated config when receiving the new targets list. scrapeManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) for _, v := range cfg.ScrapeConfigs { c[v.JobName] = v.ServiceDiscoveryConfig } return discoveryManagerScrape.ApplyConfig(c) }, notifierManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) for k, v := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { c[k] = v.ServiceDiscoveryConfig } return discoveryManagerNotify.ApplyConfig(c) }, func(cfg *config.Config) error { // Get all rule files matching the configuration paths. var files []string for _, pat := range cfg.RuleFiles { fs, err := filepath.Glob(pat) if err != nil { // The only error can be a bad pattern. return errors.Wrapf(err, "error retrieving rule files for %s", pat) } files = append(files, fs...) } return ruleManager.Update( time.Duration(cfg.GlobalConfig.EvaluationInterval), files, cfg.GlobalConfig.ExternalLabels, ) }, } prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // Start all components while we wait for TSDB to open but only load // initial config and mark ourselves as ready after it completed. dbOpen := make(chan struct{}) // sync.Once is used to make sure we can close the channel at different execution stages(SIGTERM or when the config is loaded). type closeOnce struct { C chan struct{} once sync.Once Close func() } // Wait until the server is ready to handle reloading. reloadReady := &closeOnce{ C: make(chan struct{}), } reloadReady.Close = func() { reloadReady.once.Do(func() { close(reloadReady.C) }) } var g run.Group { // Termination handler. term := make(chan os.Signal, 1) signal.Notify(term, os.Interrupt, syscall.SIGTERM) cancel := make(chan struct{}) g.Add( func() error { // Don't forget to release the reloadReady channel so that waiting blocks can exit normally. select { case <-term: level.Warn(logger).Log("msg", "Received SIGTERM, exiting gracefully...") reloadReady.Close() case <-webHandler.Quit(): level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...") case <-cancel: reloadReady.Close() } return nil }, func(err error) { close(cancel) }, ) } { // Scrape discovery manager. g.Add( func() error { err := discoveryManagerScrape.Run() level.Info(logger).Log("msg", "Scrape discovery manager stopped") return err }, func(err error) { level.Info(logger).Log("msg", "Stopping scrape discovery manager...") cancelScrape() }, ) } { // Notify discovery manager. g.Add( func() error { err := discoveryManagerNotify.Run() level.Info(logger).Log("msg", "Notify discovery manager stopped") return err }, func(err error) { level.Info(logger).Log("msg", "Stopping notify discovery manager...") cancelNotify() }, ) } { // Scrape manager. g.Add( func() error { // When the scrape manager receives a new targets list // it needs to read a valid config for each job. // It depends on the config being in sync with the discovery manager so // we wait until the config is fully loaded. <-reloadReady.C err := scrapeManager.Run(discoveryManagerScrape.SyncCh()) level.Info(logger).Log("msg", "Scrape manager stopped") return err }, func(err error) { // Scrape manager needs to be stopped before closing the local TSDB // so that it doesn't try to write samples to a closed storage. level.Info(logger).Log("msg", "Stopping scrape manager...") scrapeManager.Stop() }, ) } { // Reload handler. // Make sure that sighup handler is registered with a redirect to the channel before the potentially // long and synchronous tsdb init. hup := make(chan os.Signal, 1) signal.Notify(hup, syscall.SIGHUP) cancel := make(chan struct{}) g.Add( func() error { <-reloadReady.C for { select { case <-hup: if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil { level.Error(logger).Log("msg", "Error reloading config", "err", err) } case rc := <-webHandler.Reload(): if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil { level.Error(logger).Log("msg", "Error reloading config", "err", err) rc <- err } else { rc <- nil } case <-cancel: return nil } } }, func(err error) { // Wait for any in-progress reloads to complete to avoid // reloading things after they have been shutdown. cancel <- struct{}{} }, ) } { // Initial configuration loading. cancel := make(chan struct{}) g.Add( func() error { select { case <-dbOpen: break // In case a shutdown is initiated before the dbOpen is released case <-cancel: reloadReady.Close() return nil } if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil { return errors.Wrapf(err, "error loading config from %q", cfg.configFile) } reloadReady.Close() webHandler.Ready() level.Info(logger).Log("msg", "Server is ready to receive web requests.") <-cancel return nil }, func(err error) { close(cancel) }, ) } { // Rule manager. // TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel. cancel := make(chan struct{}) g.Add( func() error { <-reloadReady.C ruleManager.Run() <-cancel return nil }, func(err error) { ruleManager.Stop() close(cancel) }, ) } { // TSDB. cancel := make(chan struct{}) g.Add( func() error { level.Info(logger).Log("msg", "Starting TSDB ...") if cfg.tsdb.WALSegmentSize != 0 { if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB") } } db, err := tsdb.Open( cfg.localStoragePath, log.With(logger, "component", "tsdb"), prometheus.DefaultRegisterer, &cfg.tsdb, ) if err != nil { return errors.Wrapf(err, "opening storage failed") } level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath)) level.Info(logger).Log("msg", "TSDB started") level.Debug(logger).Log("msg", "TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, "NoLockfile", cfg.tsdb.NoLockfile, "RetentionDuration", cfg.tsdb.RetentionDuration, "WALSegmentSize", cfg.tsdb.WALSegmentSize, "AllowOverlappingBlocks", cfg.tsdb.AllowOverlappingBlocks, "WALCompression", cfg.tsdb.WALCompression, ) startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000) localStorage.Set(db, startTimeMargin) close(dbOpen) <-cancel return nil }, func(err error) { if err := fanoutStorage.Close(); err != nil { level.Error(logger).Log("msg", "Error stopping storage", "err", err) } close(cancel) }, ) } { // Web handler. g.Add( func() error { if err := webHandler.Run(ctxWeb); err != nil { return errors.Wrapf(err, "error starting web server") } return nil }, func(err error) { cancelWeb() }, ) } { // Notifier. // Calling notifier.Stop() before ruleManager.Stop() will cause a panic if the ruleManager isn't running, // so keep this interrupt after the ruleManager.Stop(). g.Add( func() error { // When the notifier manager receives a new targets list // it needs to read a valid config for each job. // It depends on the config being in sync with the discovery manager // so we wait until the config is fully loaded. <-reloadReady.C notifierManager.Run(discoveryManagerNotify.SyncCh()) level.Info(logger).Log("msg", "Notifier manager stopped") return nil }, func(err error) { notifierManager.Stop() }, ) } if err := g.Run(); err != nil { level.Error(logger).Log("err", err) os.Exit(1) } level.Info(logger).Log("msg", "See you next time!") } func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config) error) (err error) { level.Info(logger).Log("msg", "Loading configuration file", "filename", filename) defer func() { if err == nil { configSuccess.Set(1) configSuccessTime.SetToCurrentTime() } else { configSuccess.Set(0) } }() conf, err := config.LoadFile(filename) if err != nil { return errors.Wrapf(err, "couldn't load configuration (--config.file=%q)", filename) } failed := false for _, rl := range rls { if err := rl(conf); err != nil { level.Error(logger).Log("msg", "Failed to apply configuration", "err", err) failed = true } } if failed { return errors.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) } promql.SetDefaultEvaluationInterval(time.Duration(conf.GlobalConfig.EvaluationInterval)) level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename) return nil } func startsOrEndsWithQuote(s string) bool { return strings.HasPrefix(s, "\"") || strings.HasPrefix(s, "'") || strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") } // compileCORSRegexString compiles given string and adds anchors func compileCORSRegexString(s string) (*regexp.Regexp, error) { r, err := relabel.NewRegexp(s) if err != nil { return nil, err } return r.Regexp, nil } // computeExternalURL computes a sanitized external URL from a raw input. It infers unset // URL parts from the OS and the given listen address. func computeExternalURL(u, listenAddr string) (*url.URL, error) { if u == "" { hostname, err := os.Hostname() if err != nil { return nil, err } _, port, err := net.SplitHostPort(listenAddr) if err != nil { return nil, err } u = fmt.Sprintf("http://%s:%s/", hostname, port) } if startsOrEndsWithQuote(u) { return nil, errors.New("URL must not begin or end with quotes") } eu, err := url.Parse(u) if err != nil { return nil, err } ppref := strings.TrimRight(eu.Path, "/") if ppref != "" && !strings.HasPrefix(ppref, "/") { ppref = "/" + ppref } eu.Path = ppref return eu, nil } type sender interface { Send(alerts ...*notifier.Alert) } // sendAlerts implements the rules.NotifyFunc for a Notifier. func sendAlerts(s sender, externalURL string) rules.NotifyFunc { return func(ctx context.Context, expr string, alerts ...*rules.Alert) { var res []*notifier.Alert for _, alert := range alerts { a := ¬ifier.Alert{ StartsAt: alert.FiredAt, Labels: alert.Labels, Annotations: alert.Annotations, GeneratorURL: externalURL + strutil.TableLinkForExpression(expr), } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt } else { a.EndsAt = alert.ValidUntil } res = append(res, a) } if len(alerts) > 0 { s.Send(res...) } } } prometheus-2.15.2+ds/cmd/prometheus/main_test.go000066400000000000000000000163401360540074000216510ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "context" "flag" "fmt" "net/http" "os" "os/exec" "path/filepath" "syscall" "testing" "time" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/util/testutil" ) var promPath string var promConfig = filepath.Join("..", "..", "documentation", "examples", "prometheus.yml") var promData = filepath.Join(os.TempDir(), "data") func TestMain(m *testing.M) { flag.Parse() if testing.Short() { os.Exit(m.Run()) } // On linux with a global proxy the tests will fail as the go client(http,grpc) tries to connect through the proxy. os.Setenv("no_proxy", "localhost,127.0.0.1,0.0.0.0,:") var err error promPath, err = os.Getwd() if err != nil { fmt.Printf("can't get current dir :%s \n", err) os.Exit(1) } promPath = filepath.Join(promPath, "prometheus") build := exec.Command("go", "build", "-o", promPath) output, err := build.CombinedOutput() if err != nil { fmt.Printf("compilation error :%s \n", output) os.Exit(1) } exitCode := m.Run() os.Remove(promPath) os.RemoveAll(promData) os.Exit(exitCode) } // As soon as prometheus starts responding to http request should be able to accept Interrupt signals for a graceful shutdown. func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } prom := exec.Command(promPath, "--config.file="+promConfig, "--storage.tsdb.path="+promData) err := prom.Start() if err != nil { t.Errorf("execution error: %v", err) return } done := make(chan error) go func() { done <- prom.Wait() }() var startedOk bool var stoppedErr error Loop: for x := 0; x < 10; x++ { // error=nil means prometheus has started so can send the interrupt signal and wait for the grace shutdown. if _, err := http.Get("http://localhost:9090/graph"); err == nil { startedOk = true prom.Process.Signal(os.Interrupt) select { case stoppedErr = <-done: break Loop case <-time.After(10 * time.Second): } break Loop } time.Sleep(500 * time.Millisecond) } if !startedOk { t.Errorf("prometheus didn't start in the specified timeout") return } if err := prom.Process.Kill(); err == nil { t.Errorf("prometheus didn't shutdown gracefully after sending the Interrupt signal") } else if stoppedErr != nil && stoppedErr.Error() != "signal: interrupt" { // TODO - find a better way to detect when the process didn't exit as expected! t.Errorf("prometheus exited with an unexpected error:%v", stoppedErr) } } func TestComputeExternalURL(t *testing.T) { tests := []struct { input string valid bool }{ { input: "", valid: true, }, { input: "http://proxy.com/prometheus", valid: true, }, { input: "'https://url/prometheus'", valid: false, }, { input: "'relative/path/with/quotes'", valid: false, }, { input: "http://alertmanager.company.com", valid: true, }, { input: "https://double--dash.de", valid: true, }, { input: "'http://starts/with/quote", valid: false, }, { input: "ends/with/quote\"", valid: false, }, } for _, test := range tests { _, err := computeExternalURL(test.input, "0.0.0.0:9090") if test.valid { testutil.Ok(t, err) } else { testutil.NotOk(t, err, "input=%q", test.input) } } } // Let's provide an invalid configuration file and verify the exit status indicates the error. func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } fakeInputFile := "fake-input-file" expectedExitStatus := 1 prom := exec.Command(promPath, "--config.file="+fakeInputFile) err := prom.Run() testutil.NotOk(t, err) if exitError, ok := err.(*exec.ExitError); ok { status := exitError.Sys().(syscall.WaitStatus) testutil.Equals(t, expectedExitStatus, status.ExitStatus()) } else { t.Errorf("unable to retrieve the exit status for prometheus: %v", err) } } type senderFunc func(alerts ...*notifier.Alert) func (s senderFunc) Send(alerts ...*notifier.Alert) { s(alerts...) } func TestSendAlerts(t *testing.T) { testCases := []struct { in []*rules.Alert exp []*notifier.Alert }{ { in: []*rules.Alert{ { Labels: []labels.Label{{Name: "l1", Value: "v1"}}, Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, ActiveAt: time.Unix(1, 0), FiredAt: time.Unix(2, 0), ValidUntil: time.Unix(3, 0), }, }, exp: []*notifier.Alert{ { Labels: []labels.Label{{Name: "l1", Value: "v1"}}, Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, StartsAt: time.Unix(2, 0), EndsAt: time.Unix(3, 0), GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, }, { in: []*rules.Alert{ { Labels: []labels.Label{{Name: "l1", Value: "v1"}}, Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, ActiveAt: time.Unix(1, 0), FiredAt: time.Unix(2, 0), ResolvedAt: time.Unix(4, 0), }, }, exp: []*notifier.Alert{ { Labels: []labels.Label{{Name: "l1", Value: "v1"}}, Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, StartsAt: time.Unix(2, 0), EndsAt: time.Unix(4, 0), GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, }, { in: []*rules.Alert{}, }, } for i, tc := range testCases { tc := tc t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { senderFunc := senderFunc(func(alerts ...*notifier.Alert) { if len(tc.in) == 0 { t.Fatalf("sender called with 0 alert") } testutil.Equals(t, tc.exp, alerts) }) sendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...) }) } } func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { prom := exec.Command(promPath, "--storage.tsdb.wal-segment-size="+size, "--config.file="+promConfig) err := prom.Start() testutil.Ok(t, err) if expectedExitStatus == 0 { done := make(chan error, 1) go func() { done <- prom.Wait() }() select { case err := <-done: t.Errorf("prometheus should be still running: %v", err) case <-time.After(5 * time.Second): prom.Process.Signal(os.Interrupt) } continue } err = prom.Wait() testutil.NotOk(t, err) if exitError, ok := err.(*exec.ExitError); ok { status := exitError.Sys().(syscall.WaitStatus) testutil.Equals(t, expectedExitStatus, status.ExitStatus()) } else { t.Errorf("unable to retrieve the exit status for prometheus: %v", err) } } } prometheus-2.15.2+ds/cmd/promtool/000077500000000000000000000000001360540074000170135ustar00rootroot00000000000000prometheus-2.15.2+ds/cmd/promtool/archive.go000066400000000000000000000031611360540074000207640ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "archive/tar" "compress/gzip" "os" "github.com/pkg/errors" ) const filePerm = 0644 type tarGzFileWriter struct { tarWriter *tar.Writer gzWriter *gzip.Writer file *os.File } func newTarGzFileWriter(archiveName string) (*tarGzFileWriter, error) { file, err := os.Create(archiveName) if err != nil { return nil, errors.Wrapf(err, "error creating archive %q", archiveName) } gzw := gzip.NewWriter(file) tw := tar.NewWriter(gzw) return &tarGzFileWriter{ tarWriter: tw, gzWriter: gzw, file: file, }, nil } func (w *tarGzFileWriter) close() error { if err := w.tarWriter.Close(); err != nil { return err } if err := w.gzWriter.Close(); err != nil { return err } return w.file.Close() } func (w *tarGzFileWriter) write(filename string, b []byte) error { header := &tar.Header{ Name: filename, Mode: filePerm, Size: int64(len(b)), } if err := w.tarWriter.WriteHeader(header); err != nil { return err } if _, err := w.tarWriter.Write(b); err != nil { return err } return nil } prometheus-2.15.2+ds/cmd/promtool/debug.go000066400000000000000000000036401360540074000204330ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "fmt" "io/ioutil" "net/http" "github.com/pkg/errors" ) type debugWriterConfig struct { serverURL string tarballName string endPointGroups []endpointsGroup } func debugWrite(cfg debugWriterConfig) error { archiver, err := newTarGzFileWriter(cfg.tarballName) if err != nil { return errors.Wrap(err, "error creating a new archiver") } for _, endPointGroup := range cfg.endPointGroups { for url, filename := range endPointGroup.urlToFilename { url := cfg.serverURL + url fmt.Println("collecting:", url) res, err := http.Get(url) if err != nil { return errors.Wrap(err, "error executing HTTP request") } body, err := ioutil.ReadAll(res.Body) res.Body.Close() if err != nil { return errors.Wrap(err, "error reading the response body") } if endPointGroup.postProcess != nil { body, err = endPointGroup.postProcess(body) if err != nil { return errors.Wrap(err, "error post-processing HTTP response body") } } if err := archiver.write(filename, body); err != nil { return errors.Wrap(err, "error writing into the archive") } } } if err := archiver.close(); err != nil { return errors.Wrap(err, "error closing archive writer") } fmt.Printf("Compiling debug information complete, all files written in %q.\n", cfg.tarballName) return nil } prometheus-2.15.2+ds/cmd/promtool/main.go000066400000000000000000000450741360540074000203000ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "bytes" "context" "encoding/json" "fmt" "math" "net/http" "net/url" "os" "path/filepath" "reflect" "strconv" "strings" "time" "github.com/google/pprof/profile" "github.com/pkg/errors" "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/client_golang/prometheus/promhttp" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" kingpin "gopkg.in/alecthomas/kingpin.v2" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/pkg/rulefmt" "github.com/prometheus/prometheus/util/promlint" ) func main() { app := kingpin.New(filepath.Base(os.Args[0]), "Tooling for the Prometheus monitoring system.") app.Version(version.Print("promtool")) app.HelpFlag.Short('h') checkCmd := app.Command("check", "Check the resources for validity.") checkConfigCmd := checkCmd.Command("config", "Check if the config files are valid or not.") configFiles := checkConfigCmd.Arg( "config-files", "The config files to check.", ).Required().ExistingFiles() checkRulesCmd := checkCmd.Command("rules", "Check if the rule files are valid or not.") ruleFiles := checkRulesCmd.Arg( "rule-files", "The rule files to check.", ).Required().ExistingFiles() checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage) queryCmd := app.Command("query", "Run query against a Prometheus server.") queryCmdFmt := queryCmd.Flag("format", "Output format of the query.").Short('o').Default("promql").Enum("promql", "json") queryInstantCmd := queryCmd.Command("instant", "Run instant query.") queryServer := queryInstantCmd.Arg("server", "Prometheus server to query.").Required().String() queryExpr := queryInstantCmd.Arg("expr", "PromQL query expression.").Required().String() queryRangeCmd := queryCmd.Command("range", "Run range query.") queryRangeServer := queryRangeCmd.Arg("server", "Prometheus server to query.").Required().String() queryRangeExpr := queryRangeCmd.Arg("expr", "PromQL query expression.").Required().String() queryRangeHeaders := queryRangeCmd.Flag("header", "Extra headers to send to server.").StringMap() queryRangeBegin := queryRangeCmd.Flag("start", "Query range start time (RFC3339 or Unix timestamp).").String() queryRangeEnd := queryRangeCmd.Flag("end", "Query range end time (RFC3339 or Unix timestamp).").String() queryRangeStep := queryRangeCmd.Flag("step", "Query step size (duration).").Duration() querySeriesCmd := queryCmd.Command("series", "Run series query.") querySeriesServer := querySeriesCmd.Arg("server", "Prometheus server to query.").Required().URL() querySeriesMatch := querySeriesCmd.Flag("match", "Series selector. Can be specified multiple times.").Required().Strings() querySeriesBegin := querySeriesCmd.Flag("start", "Start time (RFC3339 or Unix timestamp).").String() querySeriesEnd := querySeriesCmd.Flag("end", "End time (RFC3339 or Unix timestamp).").String() debugCmd := app.Command("debug", "Fetch debug information.") debugPprofCmd := debugCmd.Command("pprof", "Fetch profiling debug information.") debugPprofServer := debugPprofCmd.Arg("server", "Prometheus server to get pprof files from.").Required().String() debugMetricsCmd := debugCmd.Command("metrics", "Fetch metrics debug information.") debugMetricsServer := debugMetricsCmd.Arg("server", "Prometheus server to get metrics from.").Required().String() debugAllCmd := debugCmd.Command("all", "Fetch all debug information.") debugAllServer := debugAllCmd.Arg("server", "Prometheus server to get all debug information from.").Required().String() queryLabelsCmd := queryCmd.Command("labels", "Run labels query.") queryLabelsServer := queryLabelsCmd.Arg("server", "Prometheus server to query.").Required().URL() queryLabelsName := queryLabelsCmd.Arg("name", "Label name to provide label values for.").Required().String() testCmd := app.Command("test", "Unit testing.") testRulesCmd := testCmd.Command("rules", "Unit tests for rules.") testRulesFiles := testRulesCmd.Arg( "test-rule-file", "The unit test file.", ).Required().ExistingFiles() parsedCmd := kingpin.MustParse(app.Parse(os.Args[1:])) var p printer switch *queryCmdFmt { case "json": p = &jsonPrinter{} case "promql": p = &promqlPrinter{} } switch parsedCmd { case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*configFiles...)) case checkRulesCmd.FullCommand(): os.Exit(CheckRules(*ruleFiles...)) case checkMetricsCmd.FullCommand(): os.Exit(CheckMetrics()) case queryInstantCmd.FullCommand(): os.Exit(QueryInstant(*queryServer, *queryExpr, p)) case queryRangeCmd.FullCommand(): os.Exit(QueryRange(*queryRangeServer, *queryRangeHeaders, *queryRangeExpr, *queryRangeBegin, *queryRangeEnd, *queryRangeStep, p)) case querySeriesCmd.FullCommand(): os.Exit(QuerySeries(*querySeriesServer, *querySeriesMatch, *querySeriesBegin, *querySeriesEnd, p)) case debugPprofCmd.FullCommand(): os.Exit(debugPprof(*debugPprofServer)) case debugMetricsCmd.FullCommand(): os.Exit(debugMetrics(*debugMetricsServer)) case debugAllCmd.FullCommand(): os.Exit(debugAll(*debugAllServer)) case queryLabelsCmd.FullCommand(): os.Exit(QueryLabels(*queryLabelsServer, *queryLabelsName, p)) case testRulesCmd.FullCommand(): os.Exit(RulesUnitTest(*testRulesFiles...)) } } // CheckConfig validates configuration files. func CheckConfig(files ...string) int { failed := false for _, f := range files { ruleFiles, err := checkConfig(f) if err != nil { fmt.Fprintln(os.Stderr, " FAILED:", err) failed = true } else { fmt.Printf(" SUCCESS: %d rule files found\n", len(ruleFiles)) } fmt.Println() for _, rf := range ruleFiles { if n, errs := checkRules(rf); len(errs) > 0 { fmt.Fprintln(os.Stderr, " FAILED:") for _, err := range errs { fmt.Fprintln(os.Stderr, " ", err) } failed = true } else { fmt.Printf(" SUCCESS: %d rules found\n", n) } fmt.Println() } } if failed { return 1 } return 0 } func checkFileExists(fn string) error { // Nothing set, nothing to error on. if fn == "" { return nil } _, err := os.Stat(fn) return err } func checkConfig(filename string) ([]string, error) { fmt.Println("Checking", filename) cfg, err := config.LoadFile(filename) if err != nil { return nil, err } var ruleFiles []string for _, rf := range cfg.RuleFiles { rfs, err := filepath.Glob(rf) if err != nil { return nil, err } // If an explicit file was given, error if it is not accessible. if !strings.Contains(rf, "*") { if len(rfs) == 0 { return nil, errors.Errorf("%q does not point to an existing file", rf) } if err := checkFileExists(rfs[0]); err != nil { return nil, errors.Wrapf(err, "error checking rule file %q", rfs[0]) } } ruleFiles = append(ruleFiles, rfs...) } for _, scfg := range cfg.ScrapeConfigs { if err := checkFileExists(scfg.HTTPClientConfig.BearerTokenFile); err != nil { return nil, errors.Wrapf(err, "error checking bearer token file %q", scfg.HTTPClientConfig.BearerTokenFile) } if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig); err != nil { return nil, err } for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs { if err := checkTLSConfig(kd.HTTPClientConfig.TLSConfig); err != nil { return nil, err } } for _, filesd := range scfg.ServiceDiscoveryConfig.FileSDConfigs { for _, file := range filesd.Files { files, err := filepath.Glob(file) if err != nil { return nil, err } if len(files) != 0 { // There was at least one match for the glob and we can assume checkFileExists // for all matches would pass, we can continue the loop. continue } fmt.Printf(" WARNING: file %q for file_sd in scrape job %q does not exist\n", file, scfg.JobName) } } } return ruleFiles, nil } func checkTLSConfig(tlsConfig config_util.TLSConfig) error { if err := checkFileExists(tlsConfig.CertFile); err != nil { return errors.Wrapf(err, "error checking client cert file %q", tlsConfig.CertFile) } if err := checkFileExists(tlsConfig.KeyFile); err != nil { return errors.Wrapf(err, "error checking client key file %q", tlsConfig.KeyFile) } if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 { return errors.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile) } if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 { return errors.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile) } return nil } // CheckRules validates rule files. func CheckRules(files ...string) int { failed := false for _, f := range files { if n, errs := checkRules(f); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) } failed = true } else { fmt.Printf(" SUCCESS: %d rules found\n", n) } fmt.Println() } if failed { return 1 } return 0 } func checkRules(filename string) (int, []error) { fmt.Println("Checking", filename) rgs, errs := rulefmt.ParseFile(filename) if errs != nil { return 0, errs } numRules := 0 for _, rg := range rgs.Groups { numRules += len(rg.Rules) } dRules := checkDuplicates(rgs.Groups) if len(dRules) != 0 { fmt.Printf("%d duplicate rules(s) found.\n", len(dRules)) for _, n := range dRules { fmt.Printf("Metric: %s\nLabel(s):\n", n.metric) for i, l := range n.label { fmt.Printf("\t%s: %s\n", i, l) } } fmt.Println("Might cause inconsistency while recording expressions.") } return numRules, nil } type compareRuleType struct { metric string label map[string]string } func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType for _, group := range groups { for index, rule := range group.Rules { inst := compareRuleType{ metric: ruleMetric(rule), label: rule.Labels, } for i := 0; i < index; i++ { t := compareRuleType{ metric: ruleMetric(group.Rules[i]), label: group.Rules[i].Labels, } if reflect.DeepEqual(t, inst) { duplicates = append(duplicates, t) } } } } return duplicates } func ruleMetric(rule rulefmt.Rule) string { if rule.Alert != "" { return rule.Alert } return rule.Record } var checkMetricsUsage = strings.TrimSpace(` Pass Prometheus metrics over stdin to lint them for consistency and correctness. examples: $ cat metrics.prom | promtool check metrics $ curl -s http://localhost:9090/metrics | promtool check metrics `) // CheckMetrics performs a linting pass on input metrics. func CheckMetrics() int { l := promlint.New(os.Stdin) problems, err := l.Lint() if err != nil { fmt.Fprintln(os.Stderr, "error while linting:", err) return 1 } for _, p := range problems { fmt.Fprintln(os.Stderr, p.Metric, p.Text) } if len(problems) > 0 { return 3 } return 0 } // QueryInstant performs an instant query against a Prometheus server. func QueryInstant(url, query string, p printer) int { config := api.Config{ Address: url, } // Create new client. c, err := api.NewClient(config) if err != nil { fmt.Fprintln(os.Stderr, "error creating API client:", err) return 1 } // Run query against client. api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) val, _, err := api.Query(ctx, query, time.Now()) // Ignoring warnings for now. cancel() if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) return 1 } p.printValue(val) return 0 } // QueryRange performs a range query against a Prometheus server. func QueryRange(url string, headers map[string]string, query, start, end string, step time.Duration, p printer) int { config := api.Config{ Address: url, } if len(headers) > 0 { config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { for key, value := range headers { req.Header.Add(key, value) } return http.DefaultTransport.RoundTrip(req) }) } // Create new client. c, err := api.NewClient(config) if err != nil { fmt.Fprintln(os.Stderr, "error creating API client:", err) return 1 } var stime, etime time.Time if end == "" { etime = time.Now() } else { etime, err = parseTime(end) if err != nil { fmt.Fprintln(os.Stderr, "error parsing end time:", err) return 1 } } if start == "" { stime = etime.Add(-5 * time.Minute) } else { stime, err = parseTime(start) if err != nil { fmt.Fprintln(os.Stderr, "error parsing start time:", err) } } if !stime.Before(etime) { fmt.Fprintln(os.Stderr, "start time is not before end time") } if step == 0 { resolution := math.Max(math.Floor(etime.Sub(stime).Seconds()/250), 1) // Convert seconds to nanoseconds such that time.Duration parses correctly. step = time.Duration(resolution) * time.Second } // Run query against client. api := v1.NewAPI(c) r := v1.Range{Start: stime, End: etime, Step: step} ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now. cancel() if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) return 1 } p.printValue(val) return 0 } // QuerySeries queries for a series against a Prometheus server. func QuerySeries(url *url.URL, matchers []string, start, end string, p printer) int { config := api.Config{ Address: url.String(), } // Create new client. c, err := api.NewClient(config) if err != nil { fmt.Fprintln(os.Stderr, "error creating API client:", err) return 1 } // TODO: clean up timestamps var ( minTime = time.Now().Add(-9999 * time.Hour) maxTime = time.Now().Add(9999 * time.Hour) ) var stime, etime time.Time if start == "" { stime = minTime } else { stime, err = parseTime(start) if err != nil { fmt.Fprintln(os.Stderr, "error parsing start time:", err) } } if end == "" { etime = maxTime } else { etime, err = parseTime(end) if err != nil { fmt.Fprintln(os.Stderr, "error parsing end time:", err) } } // Run query against client. api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now. cancel() if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) return 1 } p.printSeries(val) return 0 } // QueryLabels queries for label values against a Prometheus server. func QueryLabels(url *url.URL, name string, p printer) int { config := api.Config{ Address: url.String(), } // Create new client. c, err := api.NewClient(config) if err != nil { fmt.Fprintln(os.Stderr, "error creating API client:", err) return 1 } // Run query against client. api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) val, warn, err := api.LabelValues(ctx, name) cancel() for _, v := range warn { fmt.Fprintln(os.Stderr, "query warning:", v) } if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) return 1 } p.printLabelValues(val) return 0 } func parseTime(s string) (time.Time, error) { if t, err := strconv.ParseFloat(s, 64); err == nil { s, ns := math.Modf(t) return time.Unix(int64(s), int64(ns*float64(time.Second))), nil } if t, err := time.Parse(time.RFC3339Nano, s); err == nil { return t, nil } return time.Time{}, errors.Errorf("cannot parse %q to a valid timestamp", s) } type endpointsGroup struct { urlToFilename map[string]string postProcess func(b []byte) ([]byte, error) } var ( pprofEndpoints = []endpointsGroup{ { urlToFilename: map[string]string{ "/debug/pprof/profile?seconds=30": "cpu.pb", "/debug/pprof/block": "block.pb", "/debug/pprof/goroutine": "goroutine.pb", "/debug/pprof/heap": "heap.pb", "/debug/pprof/mutex": "mutex.pb", "/debug/pprof/threadcreate": "threadcreate.pb", }, postProcess: func(b []byte) ([]byte, error) { p, err := profile.Parse(bytes.NewReader(b)) if err != nil { return nil, err } var buf bytes.Buffer if err := p.WriteUncompressed(&buf); err != nil { return nil, errors.Wrap(err, "writing the profile to the buffer") } return buf.Bytes(), nil }, }, { urlToFilename: map[string]string{ "/debug/pprof/trace?seconds=30": "trace.pb", }, }, } metricsEndpoints = []endpointsGroup{ { urlToFilename: map[string]string{ "/metrics": "metrics.txt", }, }, } allEndpoints = append(pprofEndpoints, metricsEndpoints...) ) func debugPprof(url string) int { if err := debugWrite(debugWriterConfig{ serverURL: url, tarballName: "debug.tar.gz", endPointGroups: pprofEndpoints, }); err != nil { fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } return 0 } func debugMetrics(url string) int { if err := debugWrite(debugWriterConfig{ serverURL: url, tarballName: "debug.tar.gz", endPointGroups: metricsEndpoints, }); err != nil { fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } return 0 } func debugAll(url string) int { if err := debugWrite(debugWriterConfig{ serverURL: url, tarballName: "debug.tar.gz", endPointGroups: allEndpoints, }); err != nil { fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } return 0 } type printer interface { printValue(v model.Value) printSeries(v []model.LabelSet) printLabelValues(v model.LabelValues) } type promqlPrinter struct{} func (p *promqlPrinter) printValue(v model.Value) { fmt.Println(v) } func (p *promqlPrinter) printSeries(val []model.LabelSet) { for _, v := range val { fmt.Println(v) } } func (p *promqlPrinter) printLabelValues(val model.LabelValues) { for _, v := range val { fmt.Println(v) } } type jsonPrinter struct{} func (j *jsonPrinter) printValue(v model.Value) { //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } func (j *jsonPrinter) printSeries(v []model.LabelSet) { //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } func (j *jsonPrinter) printLabelValues(v model.LabelValues) { //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } prometheus-2.15.2+ds/cmd/promtool/main_test.go000066400000000000000000000042641360540074000213330ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "fmt" "net/http" "net/http/httptest" "testing" "time" ) func TestQueryRange(t *testing.T) { s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) defer s.Close() p := &promqlPrinter{} exitCode := QueryRange(s.URL, map[string]string{}, "up", "0", "300", 0, p) expectedPath := "/api/v1/query_range" gotPath := getRequest().URL.Path if gotPath != expectedPath { t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) } form := getRequest().Form actual := form.Get("query") if actual != "up" { t.Errorf("unexpected value %s for query", actual) } actual = form.Get("step") if actual != "1" { t.Errorf("unexpected value %s for step", actual) } if exitCode > 0 { t.Error() } exitCode = QueryRange(s.URL, map[string]string{}, "up", "0", "300", 10*time.Millisecond, p) gotPath = getRequest().URL.Path if gotPath != expectedPath { t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) } form = getRequest().Form actual = form.Get("query") if actual != "up" { t.Errorf("unexpected value %s for query", actual) } actual = form.Get("step") if actual != "0.01" { t.Errorf("unexpected value %s for step", actual) } if exitCode > 0 { t.Error() } } func mockServer(code int, body string) (*httptest.Server, func() *http.Request) { var req *http.Request server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { r.ParseForm() req = r w.WriteHeader(code) fmt.Fprintln(w, body) })) f := func() *http.Request { return req } return server, f } prometheus-2.15.2+ds/cmd/promtool/unittest.go000066400000000000000000000351001360540074000212200ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "context" "fmt" "io/ioutil" "os" "path/filepath" "reflect" "sort" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/pkg/errors" yaml "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" ) // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. func RulesUnitTest(files ...string) int { failed := false for _, f := range files { if errs := ruleUnitTest(f); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) } failed = true } else { fmt.Println(" SUCCESS") } fmt.Println() } if failed { return 1 } return 0 } func ruleUnitTest(filename string) []error { fmt.Println("Unit Testing: ", filename) b, err := ioutil.ReadFile(filename) if err != nil { return []error{err} } var unitTestInp unitTestFile if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil { return []error{err} } if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil { return []error{err} } if unitTestInp.EvaluationInterval == 0 { unitTestInp.EvaluationInterval = 1 * time.Minute } // Bounds for evaluating the rules. mint := time.Unix(0, 0) maxd := unitTestInp.maxEvalTime() maxt := mint.Add(maxd) // Rounding off to nearest Eval time (> maxt). maxt = maxt.Add(unitTestInp.EvaluationInterval / 2).Round(unitTestInp.EvaluationInterval) // Giving number for groups mentioned in the file for ordering. // Lower number group should be evaluated before higher number group. groupOrderMap := make(map[string]int) for i, gn := range unitTestInp.GroupEvalOrder { if _, ok := groupOrderMap[gn]; ok { return []error{errors.Errorf("group name repeated in evaluation order: %s", gn)} } groupOrderMap[gn] = i } // Testing. var errs []error for _, t := range unitTestInp.Tests { ers := t.test(mint, maxt, unitTestInp.EvaluationInterval, groupOrderMap, unitTestInp.RuleFiles...) if ers != nil { errs = append(errs, ers...) } } if len(errs) > 0 { return errs } return nil } // unitTestFile holds the contents of a single unit test file. type unitTestFile struct { RuleFiles []string `yaml:"rule_files"` EvaluationInterval time.Duration `yaml:"evaluation_interval,omitempty"` GroupEvalOrder []string `yaml:"group_eval_order"` Tests []testGroup `yaml:"tests"` } func (utf *unitTestFile) maxEvalTime() time.Duration { var maxd time.Duration for _, t := range utf.Tests { d := t.maxEvalTime() if d > maxd { maxd = d } } return maxd } // resolveAndGlobFilepaths joins all relative paths in a configuration // with a given base directory and replaces all globs with matching files. func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error { for i, rf := range utf.RuleFiles { if rf != "" && !filepath.IsAbs(rf) { utf.RuleFiles[i] = filepath.Join(baseDir, rf) } } var globbedFiles []string for _, rf := range utf.RuleFiles { m, err := filepath.Glob(rf) if err != nil { return err } if len(m) <= 0 { fmt.Fprintln(os.Stderr, " WARNING: no file match pattern", rf) } globbedFiles = append(globbedFiles, m...) } utf.RuleFiles = globbedFiles return nil } // testGroup is a group of input series and tests associated with it. type testGroup struct { Interval time.Duration `yaml:"interval"` InputSeries []series `yaml:"input_series"` AlertRuleTests []alertTestCase `yaml:"alert_rule_test,omitempty"` PromqlExprTests []promqlTestCase `yaml:"promql_expr_test,omitempty"` ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` } // test performs the unit tests. func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error { // Setup testing suite. suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString()) if err != nil { return []error{err} } defer suite.Close() // Load the rule files. opts := &rules.ManagerOptions{ QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()), Appendable: suite.Storage(), Context: context.Background(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, Logger: log.NewNopLogger(), } m := rules.NewManager(opts) groupsMap, ers := m.LoadGroups(tg.Interval, tg.ExternalLabels, ruleFiles...) if ers != nil { return ers } groups := orderedGroups(groupsMap, groupOrderMap) // Pre-processing some data for testing alerts. // All this preparation is so that we can test alerts as we evaluate the rules. // This avoids storing them in memory, as the number of evals might be high. // All the `eval_time` for which we have unit tests for alerts. alertEvalTimesMap := map[time.Duration]struct{}{} // Map of all the eval_time+alertname combination present in the unit tests. alertsInTest := make(map[time.Duration]map[string]struct{}) // Map of all the unit tests for given eval_time. alertTests := make(map[time.Duration][]alertTestCase) for _, alert := range tg.AlertRuleTests { alertEvalTimesMap[alert.EvalTime] = struct{}{} if _, ok := alertsInTest[alert.EvalTime]; !ok { alertsInTest[alert.EvalTime] = make(map[string]struct{}) } alertsInTest[alert.EvalTime][alert.Alertname] = struct{}{} alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert) } alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap)) for k := range alertEvalTimesMap { alertEvalTimes = append(alertEvalTimes, k) } sort.Slice(alertEvalTimes, func(i, j int) bool { return alertEvalTimes[i] < alertEvalTimes[j] }) // Current index in alertEvalTimes what we are looking at. curr := 0 var errs []error for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) { // Collects the alerts asked for unit testing. suite.WithSamplesTill(ts, func(err error) { if err != nil { errs = append(errs, err) return } for _, g := range groups { g.Eval(suite.Context(), ts) for _, r := range g.Rules() { if r.LastError() != nil { errs = append(errs, errors.Errorf(" rule: %s, time: %s, err: %v", r.Name(), ts.Sub(time.Unix(0, 0)), r.LastError())) } } } }) if len(errs) > 0 { return errs } for { if !(curr < len(alertEvalTimes) && ts.Sub(mint) <= alertEvalTimes[curr] && alertEvalTimes[curr] < ts.Add(evalInterval).Sub(mint)) { break } // We need to check alerts for this time. // If 'ts <= `eval_time=alertEvalTimes[curr]` < ts+evalInterval' // then we compare alerts with the Eval at `ts`. t := alertEvalTimes[curr] presentAlerts := alertsInTest[t] got := make(map[string]labelsAndAnnotations) // Same Alert name can be present in multiple groups. // Hence we collect them all to check against expected alerts. for _, g := range groups { grules := g.Rules() for _, r := range grules { ar, ok := r.(*rules.AlertingRule) if !ok { continue } if _, ok := presentAlerts[ar.Name()]; !ok { continue } var alerts labelsAndAnnotations for _, a := range ar.ActiveAlerts() { if a.State == rules.StateFiring { alerts = append(alerts, labelAndAnnotation{ Labels: append(labels.Labels{}, a.Labels...), Annotations: append(labels.Labels{}, a.Annotations...), }) } } got[ar.Name()] = append(got[ar.Name()], alerts...) } } for _, testcase := range alertTests[t] { // Checking alerts. gotAlerts := got[testcase.Alertname] var expAlerts labelsAndAnnotations for _, a := range testcase.ExpAlerts { // User gives only the labels from alerting rule, which doesn't // include this label (added by Prometheus during Eval). if a.ExpLabels == nil { a.ExpLabels = make(map[string]string) } a.ExpLabels[labels.AlertName] = testcase.Alertname expAlerts = append(expAlerts, labelAndAnnotation{ Labels: labels.FromMap(a.ExpLabels), Annotations: labels.FromMap(a.ExpAnnotations), }) } if gotAlerts.Len() != expAlerts.Len() { errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) } else { sort.Sort(gotAlerts) sort.Sort(expAlerts) if !reflect.DeepEqual(expAlerts, gotAlerts) { errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) } } } curr++ } } // Checking promql expressions. Outer: for _, testCase := range tg.PromqlExprTests { got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime), suite.QueryEngine(), suite.Queryable()) if err != nil { errs = append(errs, errors.Errorf(" expr: %q, time: %s, err: %s", testCase.Expr, testCase.EvalTime.String(), err.Error())) continue } var gotSamples []parsedSample for _, s := range got { gotSamples = append(gotSamples, parsedSample{ Labels: s.Metric.Copy(), Value: s.V, }) } var expSamples []parsedSample for _, s := range testCase.ExpSamples { lb, err := promql.ParseMetric(s.Labels) if err != nil { err = errors.Wrapf(err, "labels %q", s.Labels) errs = append(errs, errors.Errorf(" expr: %q, time: %s, err: %s", testCase.Expr, testCase.EvalTime.String(), err.Error())) continue Outer } expSamples = append(expSamples, parsedSample{ Labels: lb, Value: s.Value, }) } sort.Slice(expSamples, func(i, j int) bool { return labels.Compare(expSamples[i].Labels, expSamples[j].Labels) <= 0 }) sort.Slice(gotSamples, func(i, j int) bool { return labels.Compare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0 }) if !reflect.DeepEqual(expSamples, gotSamples) { errs = append(errs, errors.Errorf(" expr: %q, time: %s,\n exp:%#v\n got:%#v", testCase.Expr, testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples))) } } if len(errs) > 0 { return errs } return nil } // seriesLoadingString returns the input series in PromQL notation. func (tg *testGroup) seriesLoadingString() string { result := "" result += "load " + shortDuration(tg.Interval) + "\n" for _, is := range tg.InputSeries { result += " " + is.Series + " " + is.Values + "\n" } return result } func shortDuration(d time.Duration) string { s := d.String() if strings.HasSuffix(s, "m0s") { s = s[:len(s)-2] } if strings.HasSuffix(s, "h0m") { s = s[:len(s)-2] } return s } // orderedGroups returns a slice of `*rules.Group` from `groupsMap` which follows the order // mentioned by `groupOrderMap`. NOTE: This is partial ordering. func orderedGroups(groupsMap map[string]*rules.Group, groupOrderMap map[string]int) []*rules.Group { groups := make([]*rules.Group, 0, len(groupsMap)) for _, g := range groupsMap { groups = append(groups, g) } sort.Slice(groups, func(i, j int) bool { return groupOrderMap[groups[i].Name()] < groupOrderMap[groups[j].Name()] }) return groups } // maxEvalTime returns the max eval time among all alert and promql unit tests. func (tg *testGroup) maxEvalTime() time.Duration { var maxd time.Duration for _, alert := range tg.AlertRuleTests { if alert.EvalTime > maxd { maxd = alert.EvalTime } } for _, pet := range tg.PromqlExprTests { if pet.EvalTime > maxd { maxd = pet.EvalTime } } return maxd } func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, qu storage.Queryable) (promql.Vector, error) { q, err := engine.NewInstantQuery(qu, qs, t) if err != nil { return nil, err } res := q.Exec(ctx) if res.Err != nil { return nil, res.Err } switch v := res.Value.(type) { case promql.Vector: return v, nil case promql.Scalar: return promql.Vector{promql.Sample{ Point: promql.Point(v), Metric: labels.Labels{}, }}, nil default: return nil, errors.New("rule result is not a vector or scalar") } } type labelsAndAnnotations []labelAndAnnotation func (la labelsAndAnnotations) Len() int { return len(la) } func (la labelsAndAnnotations) Swap(i, j int) { la[i], la[j] = la[j], la[i] } func (la labelsAndAnnotations) Less(i, j int) bool { diff := labels.Compare(la[i].Labels, la[j].Labels) if diff != 0 { return diff < 0 } return labels.Compare(la[i].Annotations, la[j].Annotations) < 0 } func (la labelsAndAnnotations) String() string { if len(la) == 0 { return "[]" } s := "[" + la[0].String() for _, l := range la[1:] { s += ", " + l.String() } s += "]" return s } type labelAndAnnotation struct { Labels labels.Labels Annotations labels.Labels } func (la *labelAndAnnotation) String() string { return "Labels:" + la.Labels.String() + " Annotations:" + la.Annotations.String() } type series struct { Series string `yaml:"series"` Values string `yaml:"values"` } type alertTestCase struct { EvalTime time.Duration `yaml:"eval_time"` Alertname string `yaml:"alertname"` ExpAlerts []alert `yaml:"exp_alerts"` } type alert struct { ExpLabels map[string]string `yaml:"exp_labels"` ExpAnnotations map[string]string `yaml:"exp_annotations"` } type promqlTestCase struct { Expr string `yaml:"expr"` EvalTime time.Duration `yaml:"eval_time"` ExpSamples []sample `yaml:"exp_samples"` } type sample struct { Labels string `yaml:"labels"` Value float64 `yaml:"value"` } // parsedSample is a sample with parsed Labels. type parsedSample struct { Labels labels.Labels Value float64 } func parsedSamplesString(pss []parsedSample) string { if len(pss) == 0 { return "nil" } s := pss[0].String() for _, ps := range pss[1:] { s += ", " + ps.String() } return s } func (ps *parsedSample) String() string { return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64) } prometheus-2.15.2+ds/code-of-conduct.md000066400000000000000000000002331360540074000176660ustar00rootroot00000000000000## Prometheus Community Code of Conduct Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). prometheus-2.15.2+ds/config/000077500000000000000000000000001360540074000156425ustar00rootroot00000000000000prometheus-2.15.2+ds/config/config.go000066400000000000000000000564351360540074000174530ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "fmt" "io/ioutil" "net/url" "path/filepath" "regexp" "strings" "time" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" yaml "gopkg.in/yaml.v2" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/relabel" ) var ( patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) ) // Load parses the YAML input s into a Config. func Load(s string) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry // point as well. *cfg = DefaultConfig err := yaml.UnmarshalStrict([]byte(s), cfg) if err != nil { return nil, err } cfg.original = s return cfg, nil } // LoadFile parses the given YAML file into a Config. func LoadFile(filename string) (*Config, error) { content, err := ioutil.ReadFile(filename) if err != nil { return nil, err } cfg, err := Load(string(content)) if err != nil { return nil, errors.Wrapf(err, "parsing YAML file %s", filename) } resolveFilepaths(filepath.Dir(filename), cfg) return cfg, nil } // The defaults applied before parsing the respective config sections. var ( // DefaultConfig is the default top-level configuration. DefaultConfig = Config{ GlobalConfig: DefaultGlobalConfig, } // DefaultGlobalConfig is the default global configuration. DefaultGlobalConfig = GlobalConfig{ ScrapeInterval: model.Duration(1 * time.Minute), ScrapeTimeout: model.Duration(10 * time.Second), EvaluationInterval: model.Duration(1 * time.Minute), } // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout and ScrapeInterval default to the // configured globals. MetricsPath: "/metrics", Scheme: "http", HonorLabels: false, HonorTimestamps: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. DefaultAlertmanagerConfig = AlertmanagerConfig{ Scheme: "http", Timeout: model.Duration(10 * time.Second), APIVersion: AlertmanagerAPIVersionV1, } // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), QueueConfig: DefaultQueueConfig, } // DefaultQueueConfig is the default remote queue configuration. DefaultQueueConfig = QueueConfig{ // With a maximum of 1000 shards, assuming an average of 100ms remote write // time and 100 samples per batch, we will be able to push 1M samples/s. MaxShards: 1000, MinShards: 1, MaxSamplesPerSend: 100, // Each shard will have a max of 500 samples pending in it's channel, plus the pending // samples that have been enqueued. Theoretically we should only ever have about 600 samples // per shard pending. At 1000 shards that's 600k. Capacity: 500, BatchSendDeadline: model.Duration(5 * time.Second), // Backoff times for retrying a batch of samples on recoverable errors. MinBackoff: model.Duration(30 * time.Millisecond), MaxBackoff: model.Duration(100 * time.Millisecond), } // DefaultRemoteReadConfig is the default remote read configuration. DefaultRemoteReadConfig = RemoteReadConfig{ RemoteTimeout: model.Duration(1 * time.Minute), } ) // Config is the top-level configuration for Prometheus's config files. type Config struct { GlobalConfig GlobalConfig `yaml:"global"` AlertingConfig AlertingConfig `yaml:"alerting,omitempty"` RuleFiles []string `yaml:"rule_files,omitempty"` ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"` RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"` // original is the input from which the config was parsed. original string } // resolveFilepaths joins all relative paths in a configuration // with a given base directory. func resolveFilepaths(baseDir string, cfg *Config) { join := func(fp string) string { if len(fp) > 0 && !filepath.IsAbs(fp) { fp = filepath.Join(baseDir, fp) } return fp } for i, rf := range cfg.RuleFiles { cfg.RuleFiles[i] = join(rf) } tlsPaths := func(cfg *config_util.TLSConfig) { cfg.CAFile = join(cfg.CAFile) cfg.CertFile = join(cfg.CertFile) cfg.KeyFile = join(cfg.KeyFile) } clientPaths := func(scfg *config_util.HTTPClientConfig) { if scfg.BasicAuth != nil { scfg.BasicAuth.PasswordFile = join(scfg.BasicAuth.PasswordFile) } scfg.BearerTokenFile = join(scfg.BearerTokenFile) tlsPaths(&scfg.TLSConfig) } sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) { for _, kcfg := range cfg.KubernetesSDConfigs { clientPaths(&kcfg.HTTPClientConfig) } for _, mcfg := range cfg.MarathonSDConfigs { mcfg.AuthTokenFile = join(mcfg.AuthTokenFile) clientPaths(&mcfg.HTTPClientConfig) } for _, consulcfg := range cfg.ConsulSDConfigs { tlsPaths(&consulcfg.TLSConfig) } for _, cfg := range cfg.OpenstackSDConfigs { tlsPaths(&cfg.TLSConfig) } for _, cfg := range cfg.TritonSDConfigs { tlsPaths(&cfg.TLSConfig) } for _, filecfg := range cfg.FileSDConfigs { for i, fn := range filecfg.Files { filecfg.Files[i] = join(fn) } } } for _, cfg := range cfg.ScrapeConfigs { clientPaths(&cfg.HTTPClientConfig) sdPaths(&cfg.ServiceDiscoveryConfig) } for _, cfg := range cfg.AlertingConfig.AlertmanagerConfigs { clientPaths(&cfg.HTTPClientConfig) sdPaths(&cfg.ServiceDiscoveryConfig) } for _, cfg := range cfg.RemoteReadConfigs { clientPaths(&cfg.HTTPClientConfig) } for _, cfg := range cfg.RemoteWriteConfigs { clientPaths(&cfg.HTTPClientConfig) } } func (c Config) String() string { b, err := yaml.Marshal(c) if err != nil { return fmt.Sprintf("", err) } return string(b) } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultConfig // We want to set c to the defaults and then overwrite it with the input. // To make unmarshal fill the plain data struct rather than calling UnmarshalYAML // again, we have to hide it using a type indirection. type plain Config if err := unmarshal((*plain)(c)); err != nil { return err } // If a global block was open but empty the default global config is overwritten. // We have to restore it here. if c.GlobalConfig.isZero() { c.GlobalConfig = DefaultGlobalConfig } for _, rf := range c.RuleFiles { if !patRulePath.MatchString(rf) { return errors.Errorf("invalid rule file path %q", rf) } } // Do global overrides and validate unique names. jobNames := map[string]struct{}{} for _, scfg := range c.ScrapeConfigs { if scfg == nil { return errors.New("empty or null scrape config section") } // First set the correct scrape interval, then check that the timeout // (inferred or explicit) is not greater than that. if scfg.ScrapeInterval == 0 { scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval } if scfg.ScrapeTimeout > scfg.ScrapeInterval { return errors.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName) } if scfg.ScrapeTimeout == 0 { if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval { scfg.ScrapeTimeout = scfg.ScrapeInterval } else { scfg.ScrapeTimeout = c.GlobalConfig.ScrapeTimeout } } if _, ok := jobNames[scfg.JobName]; ok { return errors.Errorf("found multiple scrape configs with job name %q", scfg.JobName) } jobNames[scfg.JobName] = struct{}{} } rwNames := map[string]struct{}{} for _, rwcfg := range c.RemoteWriteConfigs { if rwcfg == nil { return errors.New("empty or null remote write config section") } // Skip empty names, we fill their name with their config hash in remote write code. if _, ok := rwNames[rwcfg.Name]; ok && rwcfg.Name != "" { return errors.Errorf("found multiple remote write configs with job name %q", rwcfg.Name) } rwNames[rwcfg.Name] = struct{}{} } rrNames := map[string]struct{}{} for _, rrcfg := range c.RemoteReadConfigs { if rrcfg == nil { return errors.New("empty or null remote read config section") } // Skip empty names, we fill their name with their config hash in remote read code. if _, ok := rrNames[rrcfg.Name]; ok && rrcfg.Name != "" { return errors.Errorf("found multiple remote read configs with job name %q", rrcfg.Name) } rrNames[rrcfg.Name] = struct{}{} } return nil } // GlobalConfig configures values that are used across other configuration // objects. type GlobalConfig struct { // How frequently to scrape targets by default. ScrapeInterval model.Duration `yaml:"scrape_interval,omitempty"` // The default timeout when scraping targets. ScrapeTimeout model.Duration `yaml:"scrape_timeout,omitempty"` // How frequently to evaluate rules by default. EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // Create a clean global config as the previous one was already populated // by the default due to the YAML parser behavior for empty blocks. gc := &GlobalConfig{} type plain GlobalConfig if err := unmarshal((*plain)(gc)); err != nil { return err } for _, l := range gc.ExternalLabels { if !model.LabelName(l.Name).IsValid() { return errors.Errorf("%q is not a valid label name", l.Name) } if !model.LabelValue(l.Value).IsValid() { return errors.Errorf("%q is not a valid label value", l.Value) } } // First set the correct scrape interval, then check that the timeout // (inferred or explicit) is not greater than that. if gc.ScrapeInterval == 0 { gc.ScrapeInterval = DefaultGlobalConfig.ScrapeInterval } if gc.ScrapeTimeout > gc.ScrapeInterval { return errors.New("global scrape timeout greater than scrape interval") } if gc.ScrapeTimeout == 0 { if DefaultGlobalConfig.ScrapeTimeout > gc.ScrapeInterval { gc.ScrapeTimeout = gc.ScrapeInterval } else { gc.ScrapeTimeout = DefaultGlobalConfig.ScrapeTimeout } } if gc.EvaluationInterval == 0 { gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval } *c = *gc return nil } // isZero returns true iff the global config is the zero value. func (c *GlobalConfig) isZero() bool { return c.ExternalLabels == nil && c.ScrapeInterval == 0 && c.ScrapeTimeout == 0 && c.EvaluationInterval == 0 } // ScrapeConfig configures a scraping unit for Prometheus. type ScrapeConfig struct { // The job name to which the job label is set by default. JobName string `yaml:"job_name"` // Indicator whether the scraped metrics should remain unmodified. HonorLabels bool `yaml:"honor_labels,omitempty"` // Indicator whether the scraped timestamps should be respected. HonorTimestamps bool `yaml:"honor_timestamps"` // A set of query parameters with which the target is scraped. Params url.Values `yaml:"params,omitempty"` // How frequently to scrape the targets of this scrape config. ScrapeInterval model.Duration `yaml:"scrape_interval,omitempty"` // The timeout for scraping targets of this config. ScrapeTimeout model.Duration `yaml:"scrape_timeout,omitempty"` // The HTTP resource path on which to fetch metrics from targets. MetricsPath string `yaml:"metrics_path,omitempty"` // The URL scheme with which to fetch metrics from targets. Scheme string `yaml:"scheme,omitempty"` // More than this many samples post metric-relabelling will cause the scrape to fail. SampleLimit uint `yaml:"sample_limit,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. ServiceDiscoveryConfig sd_config.ServiceDiscoveryConfig `yaml:",inline"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` // List of target relabel configurations. RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` // List of metric relabel configurations. MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultScrapeConfig type plain ScrapeConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.JobName) == 0 { return errors.New("job_name is empty") } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. if err := c.HTTPClientConfig.Validate(); err != nil { return err } // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. if err := c.ServiceDiscoveryConfig.Validate(); err != nil { return err } // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for _, t := range tg.Targets { if err := CheckTargetAddress(t[model.AddressLabel]); err != nil { return err } } } } for _, rlcfg := range c.RelabelConfigs { if rlcfg == nil { return errors.New("empty or null target relabeling rule in scrape config") } } for _, rlcfg := range c.MetricRelabelConfigs { if rlcfg == nil { return errors.New("empty or null metric relabeling rule in scrape config") } } // Add index to the static config target groups for unique identification // within scrape pool. for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { tg.Source = fmt.Sprintf("%d", i) } return nil } // AlertingConfig configures alerting and alertmanager related configs. type AlertingConfig struct { AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` AlertmanagerConfigs AlertmanagerConfigs `yaml:"alertmanagers,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // Create a clean global config as the previous one was already populated // by the default due to the YAML parser behavior for empty blocks. *c = AlertingConfig{} type plain AlertingConfig if err := unmarshal((*plain)(c)); err != nil { return err } for _, rlcfg := range c.AlertRelabelConfigs { if rlcfg == nil { return errors.New("empty or null alert relabeling rule") } } return nil } // AlertmanagerConfigs is a slice of *AlertmanagerConfig. type AlertmanagerConfigs []*AlertmanagerConfig // ToMap converts a slice of *AlertmanagerConfig to a map. func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { ret := make(map[string]*AlertmanagerConfig) for i := range a { ret[fmt.Sprintf("config-%d", i)] = a[i] } return ret } // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. type AlertmanagerAPIVersion string // UnmarshalYAML implements the yaml.Unmarshaler interface. func (v *AlertmanagerAPIVersion) UnmarshalYAML(unmarshal func(interface{}) error) error { *v = AlertmanagerAPIVersion("") type plain AlertmanagerAPIVersion if err := unmarshal((*plain)(v)); err != nil { return err } for _, supportedVersion := range SupportedAlertmanagerAPIVersions { if *v == supportedVersion { return nil } } return fmt.Errorf("expected Alertmanager api version to be one of %v but got %v", SupportedAlertmanagerAPIVersions, *v) } const ( // AlertmanagerAPIVersionV1 represents // github.com/prometheus/alertmanager/api/v1. AlertmanagerAPIVersionV1 AlertmanagerAPIVersion = "v1" // AlertmanagerAPIVersionV2 represents // github.com/prometheus/alertmanager/api/v2. AlertmanagerAPIVersionV2 AlertmanagerAPIVersion = "v2" ) var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. type AlertmanagerConfig struct { // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. ServiceDiscoveryConfig sd_config.ServiceDiscoveryConfig `yaml:",inline"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` // The URL scheme to use when talking to Alertmanagers. Scheme string `yaml:"scheme,omitempty"` // Path prefix to add in front of the push endpoint path. PathPrefix string `yaml:"path_prefix,omitempty"` // The timeout used when sending alerts. Timeout model.Duration `yaml:"timeout,omitempty"` // The api version of Alertmanager. APIVersion AlertmanagerAPIVersion `yaml:"api_version"` // List of Alertmanager relabel configurations. RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultAlertmanagerConfig type plain AlertmanagerConfig if err := unmarshal((*plain)(c)); err != nil { return err } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. if err := c.HTTPClientConfig.Validate(); err != nil { return err } // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. if err := c.ServiceDiscoveryConfig.Validate(); err != nil { return err } // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for _, t := range tg.Targets { if err := CheckTargetAddress(t[model.AddressLabel]); err != nil { return err } } } } for _, rlcfg := range c.RelabelConfigs { if rlcfg == nil { return errors.New("empty or null Alertmanager target relabeling rule") } } // Add index to the static config target groups for unique identification // within scrape pool. for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { tg.Source = fmt.Sprintf("%d", i) } return nil } // CheckTargetAddress checks if target address is valid. func CheckTargetAddress(address model.LabelValue) error { // For now check for a URL, we may want to expand this later. if strings.Contains(string(address), "/") { return errors.Errorf("%q is not a valid hostname", address) } return nil } // ClientCert contains client cert credentials. type ClientCert struct { Cert string `yaml:"cert"` Key config_util.Secret `yaml:"key"` } // FileSDConfig is the configuration for file based discovery. type FileSDConfig struct { Files []string `yaml:"files"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` } // RemoteWriteConfig is the configuration for writing to remote storage. type RemoteWriteConfig struct { URL *config_util.URL `yaml:"url"` RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` WriteRelabelConfigs []*relabel.Config `yaml:"write_relabel_configs,omitempty"` Name string `yaml:"name,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` QueueConfig QueueConfig `yaml:"queue_config,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultRemoteWriteConfig type plain RemoteWriteConfig if err := unmarshal((*plain)(c)); err != nil { return err } if c.URL == nil { return errors.New("url for remote_write is empty") } for _, rlcfg := range c.WriteRelabelConfigs { if rlcfg == nil { return errors.New("empty or null relabeling rule in remote write config") } } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. return c.HTTPClientConfig.Validate() } // QueueConfig is the configuration for the queue used to write to remote // storage. type QueueConfig struct { // Number of samples to buffer per shard before we block. Defaults to // MaxSamplesPerSend. Capacity int `yaml:"capacity,omitempty"` // Max number of shards, i.e. amount of concurrency. MaxShards int `yaml:"max_shards,omitempty"` // Min number of shards, i.e. amount of concurrency. MinShards int `yaml:"min_shards,omitempty"` // Maximum number of samples per send. MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"` // Maximum time sample will wait in buffer. BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"` // On recoverable errors, backoff exponentially. MinBackoff model.Duration `yaml:"min_backoff,omitempty"` MaxBackoff model.Duration `yaml:"max_backoff,omitempty"` } // RemoteReadConfig is the configuration for reading from remote storage. type RemoteReadConfig struct { URL *config_util.URL `yaml:"url"` RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` ReadRecent bool `yaml:"read_recent,omitempty"` Name string `yaml:"name,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` // RequiredMatchers is an optional list of equality matchers which have to // be present in a selector to query the remote read endpoint. RequiredMatchers model.LabelSet `yaml:"required_matchers,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultRemoteReadConfig type plain RemoteReadConfig if err := unmarshal((*plain)(c)); err != nil { return err } if c.URL == nil { return errors.New("url for remote_read is empty") } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. return c.HTTPClientConfig.Validate() } prometheus-2.15.2+ds/config/config_default_test.go000066400000000000000000000015771360540074000222130ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build !windows package config const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml" var ruleFilesExpectedConf = &Config{ GlobalConfig: DefaultGlobalConfig, RuleFiles: []string{ "testdata/first.rules", "testdata/rules/second.rules", "/absolute/third.rules", }, original: "", } prometheus-2.15.2+ds/config/config_test.go000066400000000000000000000672521360540074000205110ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "encoding/json" "io/ioutil" "net/url" "path/filepath" "regexp" "strings" "testing" "time" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery/azure" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/ec2" "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/discovery/marathon" "github.com/prometheus/prometheus/discovery/openstack" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/triton" "github.com/prometheus/prometheus/discovery/zookeeper" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/relabel" "github.com/prometheus/prometheus/util/testutil" ) func mustParseURL(u string) *config_util.URL { parsed, err := url.Parse(u) if err != nil { panic(err) } return &config_util.URL{URL: parsed} } var expectedConf = &Config{ GlobalConfig: GlobalConfig{ ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, EvaluationInterval: model.Duration(30 * time.Second), ExternalLabels: labels.Labels{ {Name: "foo", Value: "bar"}, {Name: "monitor", Value: "codelab"}, }, }, RuleFiles: []string{ filepath.FromSlash("testdata/first.rules"), filepath.FromSlash("testdata/my/*.rules"), }, RemoteWriteConfigs: []*RemoteWriteConfig{ { URL: mustParseURL("http://remote1/push"), RemoteTimeout: model.Duration(30 * time.Second), Name: "drop_expensive", WriteRelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__name__"}, Separator: ";", Regex: relabel.MustNewRegexp("expensive.*"), Replacement: "$1", Action: relabel.Drop, }, }, QueueConfig: DefaultQueueConfig, }, { URL: mustParseURL("http://remote2/push"), RemoteTimeout: model.Duration(30 * time.Second), QueueConfig: DefaultQueueConfig, Name: "rw_tls", HTTPClientConfig: config_util.HTTPClientConfig{ TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, }, }, }, RemoteReadConfigs: []*RemoteReadConfig{ { URL: mustParseURL("http://remote1/read"), RemoteTimeout: model.Duration(1 * time.Minute), ReadRecent: true, Name: "default", }, { URL: mustParseURL("http://remote3/read"), RemoteTimeout: model.Duration(1 * time.Minute), ReadRecent: false, Name: "read_special", RequiredMatchers: model.LabelSet{"job": "special"}, HTTPClientConfig: config_util.HTTPClientConfig{ TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, }, }, }, ScrapeConfigs: []*ScrapeConfig{ { JobName: "prometheus", HonorLabels: true, HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, HTTPClientConfig: config_util.HTTPClientConfig{ BearerTokenFile: filepath.FromSlash("testdata/valid_token_file"), }, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, {model.AddressLabel: "localhost:9191"}, }, Labels: model.LabelSet{ "my": "label", "your": "label", }, Source: "0", }, }, FileSDConfigs: []*file.SDConfig{ { Files: []string{"testdata/foo/*.slow.json", "testdata/foo/*.slow.yml", "testdata/single/file.yml"}, RefreshInterval: model.Duration(10 * time.Minute), }, { Files: []string{"testdata/bar/*.yaml"}, RefreshInterval: model.Duration(5 * time.Minute), }, }, }, RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"job", "__meta_dns_name"}, TargetLabel: "job", Separator: ";", Regex: relabel.MustNewRegexp("(.*)some-[regex]"), Replacement: "foo-${1}", Action: relabel.Replace, }, { SourceLabels: model.LabelNames{"abc"}, TargetLabel: "cde", Separator: ";", Regex: relabel.DefaultRelabelConfig.Regex, Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.Replace, }, { TargetLabel: "abc", Separator: ";", Regex: relabel.DefaultRelabelConfig.Regex, Replacement: "static", Action: relabel.Replace, }, { TargetLabel: "abc", Separator: ";", Regex: relabel.MustNewRegexp(""), Replacement: "static", Action: relabel.Replace, }, }, }, { JobName: "service-x", HonorTimestamps: true, ScrapeInterval: model.Duration(50 * time.Second), ScrapeTimeout: model.Duration(5 * time.Second), SampleLimit: 1000, HTTPClientConfig: config_util.HTTPClientConfig{ BasicAuth: &config_util.BasicAuth{ Username: "admin_name", Password: "multiline\nmysecret\ntest", }, }, MetricsPath: "/my_path", Scheme: "https", ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ DNSSDConfigs: []*dns.SDConfig{ { Names: []string{ "first.dns.address.domain.com", "second.dns.address.domain.com", }, RefreshInterval: model.Duration(15 * time.Second), Type: "SRV", }, { Names: []string{ "first.dns.address.domain.com", }, RefreshInterval: model.Duration(30 * time.Second), Type: "SRV", }, }, }, RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"job"}, Regex: relabel.MustNewRegexp("(.*)some-[regex]"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.Drop, }, { SourceLabels: model.LabelNames{"__address__"}, TargetLabel: "__tmp_hash", Regex: relabel.DefaultRelabelConfig.Regex, Replacement: relabel.DefaultRelabelConfig.Replacement, Modulus: 8, Separator: ";", Action: relabel.HashMod, }, { SourceLabels: model.LabelNames{"__tmp_hash"}, Regex: relabel.MustNewRegexp("1"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.Keep, }, { Regex: relabel.MustNewRegexp("1"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.LabelMap, }, { Regex: relabel.MustNewRegexp("d"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.LabelDrop, }, { Regex: relabel.MustNewRegexp("k"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.LabelKeep, }, }, MetricRelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__name__"}, Regex: relabel.MustNewRegexp("expensive_metric.*"), Separator: ";", Replacement: relabel.DefaultRelabelConfig.Replacement, Action: relabel.Drop, }, }, }, { JobName: "service-y", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ ConsulSDConfigs: []*consul.SDConfig{ { Server: "localhost:1234", Token: "mysecret", Services: []string{"nginx", "cache", "mysql"}, ServiceTags: []string{"canary", "v1"}, NodeMeta: map[string]string{"rack": "123"}, TagSeparator: consul.DefaultSDConfig.TagSeparator, Scheme: "https", RefreshInterval: consul.DefaultSDConfig.RefreshInterval, AllowStale: true, TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), CAFile: filepath.FromSlash("testdata/valid_ca_file"), InsecureSkipVerify: false, }, }, }, }, RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__meta_sd_consul_tags"}, Regex: relabel.MustNewRegexp("label:([^=]+)=([^,]+)"), Separator: ",", TargetLabel: "${1}", Replacement: "${2}", Action: relabel.Replace, }, }, }, { JobName: "service-z", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), MetricsPath: "/metrics", Scheme: "http", HTTPClientConfig: config_util.HTTPClientConfig{ TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, BearerToken: "mysecret", }, }, { JobName: "service-kubernetes", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ KubernetesSDConfigs: []*kubernetes.SDConfig{ { APIServer: kubernetesSDHostURL(), Role: kubernetes.RoleEndpoint, HTTPClientConfig: config_util.HTTPClientConfig{ BasicAuth: &config_util.BasicAuth{ Username: "myusername", Password: "mysecret", }, TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, }, NamespaceDiscovery: kubernetes.NamespaceDiscovery{}, }, }, }, }, { JobName: "service-kubernetes-namespaces", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, HTTPClientConfig: config_util.HTTPClientConfig{ BasicAuth: &config_util.BasicAuth{ Username: "myusername", PasswordFile: filepath.FromSlash("testdata/valid_password_file"), }, }, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ KubernetesSDConfigs: []*kubernetes.SDConfig{ { APIServer: kubernetesSDHostURL(), Role: kubernetes.RoleEndpoint, NamespaceDiscovery: kubernetes.NamespaceDiscovery{ Names: []string{ "default", }, }, }, }, }, }, { JobName: "service-marathon", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ MarathonSDConfigs: []*marathon.SDConfig{ { Servers: []string{ "https://marathon.example.com:443", }, RefreshInterval: model.Duration(30 * time.Second), AuthToken: config_util.Secret("mysecret"), HTTPClientConfig: config_util.HTTPClientConfig{ TLSConfig: config_util.TLSConfig{ CertFile: filepath.FromSlash("testdata/valid_cert_file"), KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, }, }, }, }, }, { JobName: "service-ec2", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ EC2SDConfigs: []*ec2.SDConfig{ { Region: "us-east-1", AccessKey: "access", SecretKey: "mysecret", Profile: "profile", RefreshInterval: model.Duration(60 * time.Second), Port: 80, Filters: []*ec2.Filter{ { Name: "tag:environment", Values: []string{"prod"}, }, { Name: "tag:service", Values: []string{"web", "db"}, }, }, }, }, }, }, { JobName: "service-azure", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ AzureSDConfigs: []*azure.SDConfig{ { Environment: "AzurePublicCloud", SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", ClientSecret: "mysecret", AuthenticationMethod: "OAuth", RefreshInterval: model.Duration(5 * time.Minute), Port: 9100, }, }, }, }, { JobName: "service-nerve", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ NerveSDConfigs: []*zookeeper.NerveSDConfig{ { Servers: []string{"localhost"}, Paths: []string{"/monitoring"}, Timeout: model.Duration(10 * time.Second), }, }, }, }, { JobName: "0123service-xxx", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, }, Source: "0", }, }, }, }, { JobName: "badfederation", HonorTimestamps: false, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: "/federate", Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, }, Source: "0", }, }, }, }, { JobName: "測試", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { Targets: []model.LabelSet{ {model.AddressLabel: "localhost:9090"}, }, Source: "0", }, }, }, }, { JobName: "service-triton", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ TritonSDConfigs: []*triton.SDConfig{ { Account: "testAccount", DNSSuffix: "triton.example.com", Endpoint: "triton.example.com", Port: 9163, RefreshInterval: model.Duration(60 * time.Second), Version: 1, TLSConfig: config_util.TLSConfig{ CertFile: "testdata/valid_cert_file", KeyFile: "testdata/valid_key_file", }, }, }, }, }, { JobName: "service-openstack", HonorTimestamps: true, ScrapeInterval: model.Duration(15 * time.Second), ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ OpenstackSDConfigs: []*openstack.SDConfig{ { Role: "instance", Region: "RegionOne", Port: 80, RefreshInterval: model.Duration(60 * time.Second), TLSConfig: config_util.TLSConfig{ CAFile: "testdata/valid_ca_file", CertFile: "testdata/valid_cert_file", KeyFile: "testdata/valid_key_file", }, }, }, }, }, }, AlertingConfig: AlertingConfig{ AlertmanagerConfigs: []*AlertmanagerConfig{ { Scheme: "https", Timeout: model.Duration(10 * time.Second), APIVersion: AlertmanagerAPIVersionV1, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { Targets: []model.LabelSet{ {model.AddressLabel: "1.2.3.4:9093"}, {model.AddressLabel: "1.2.3.5:9093"}, {model.AddressLabel: "1.2.3.6:9093"}, }, Source: "0", }, }, }, }, }, }, original: "", } func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. _, err := LoadFile("testdata/global_timeout.good.yml") testutil.Ok(t, err) c, err := LoadFile("testdata/conf.good.yml") testutil.Ok(t, err) expectedConf.original = c.original testutil.Equals(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { c, err := LoadFile("testdata/scrape_interval_larger.good.yml") testutil.Ok(t, err) testutil.Equals(t, 1, len(c.ScrapeConfigs)) for _, sc := range c.ScrapeConfigs { testutil.Equals(t, true, sc.ScrapeInterval >= sc.ScrapeTimeout) } } // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { c, err := LoadFile("testdata/conf.good.yml") testutil.Ok(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) config, err := yaml.Marshal(c) testutil.Ok(t, err) yamlConfig := string(config) matches := secretRe.FindAllStringIndex(yamlConfig, -1) testutil.Assert(t, len(matches) == 7, "wrong number of secret matches found") testutil.Assert(t, !strings.Contains(yamlConfig, "mysecret"), "yaml marshal reveals authentication credentials.") } func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path c, err := LoadFile(ruleFilesConfigFile) testutil.Ok(t, err) ruleFilesExpectedConf.original = c.original testutil.Equals(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml") testutil.Ok(t, err) } var expectedErrors = []struct { filename string errMsg string }{ { filename: "jobname.bad.yml", errMsg: `job_name is empty`, }, { filename: "jobname_dup.bad.yml", errMsg: `found multiple scrape configs with job name "prometheus"`, }, { filename: "scrape_interval.bad.yml", errMsg: `scrape timeout greater than scrape interval`, }, { filename: "labelname.bad.yml", errMsg: `"not$allowed" is not a valid label name`, }, { filename: "labelname2.bad.yml", errMsg: `"not:allowed" is not a valid label name`, }, { filename: "labelvalue.bad.yml", errMsg: `"\xff" is not a valid label value`, }, { filename: "regex.bad.yml", errMsg: "error parsing regexp", }, { filename: "modulus_missing.bad.yml", errMsg: "relabel configuration for hashmod requires non-zero modulus", }, { filename: "labelkeep.bad.yml", errMsg: "labelkeep action requires only 'regex', and no other fields", }, { filename: "labelkeep2.bad.yml", errMsg: "labelkeep action requires only 'regex', and no other fields", }, { filename: "labelkeep3.bad.yml", errMsg: "labelkeep action requires only 'regex', and no other fields", }, { filename: "labelkeep4.bad.yml", errMsg: "labelkeep action requires only 'regex', and no other fields", }, { filename: "labelkeep5.bad.yml", errMsg: "labelkeep action requires only 'regex', and no other fields", }, { filename: "labeldrop.bad.yml", errMsg: "labeldrop action requires only 'regex', and no other fields", }, { filename: "labeldrop2.bad.yml", errMsg: "labeldrop action requires only 'regex', and no other fields", }, { filename: "labeldrop3.bad.yml", errMsg: "labeldrop action requires only 'regex', and no other fields", }, { filename: "labeldrop4.bad.yml", errMsg: "labeldrop action requires only 'regex', and no other fields", }, { filename: "labeldrop5.bad.yml", errMsg: "labeldrop action requires only 'regex', and no other fields", }, { filename: "labelmap.bad.yml", errMsg: "\"l-$1\" is invalid 'replacement' for labelmap action", }, { filename: "rules.bad.yml", errMsg: "invalid rule file path", }, { filename: "unknown_attr.bad.yml", errMsg: "field consult_sd_configs not found in type config.plain", }, { filename: "bearertoken.bad.yml", errMsg: "at most one of bearer_token & bearer_token_file must be configured", }, { filename: "bearertoken_basicauth.bad.yml", errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured", }, { filename: "kubernetes_http_config_without_api_server.bad.yml", errMsg: "to use custom HTTP client configuration please provide the 'api_server' URL explicitly", }, { filename: "kubernetes_bearertoken.bad.yml", errMsg: "at most one of bearer_token & bearer_token_file must be configured", }, { filename: "kubernetes_role.bad.yml", errMsg: "role", }, { filename: "kubernetes_namespace_discovery.bad.yml", errMsg: "field foo not found in type kubernetes.plain", }, { filename: "kubernetes_bearertoken_basicauth.bad.yml", errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured", }, { filename: "marathon_no_servers.bad.yml", errMsg: "marathon_sd: must contain at least one Marathon server", }, { filename: "marathon_authtoken_authtokenfile.bad.yml", errMsg: "marathon_sd: at most one of auth_token & auth_token_file must be configured", }, { filename: "marathon_authtoken_basicauth.bad.yml", errMsg: "marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured", }, { filename: "marathon_authtoken_bearertoken.bad.yml", errMsg: "marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured", }, { filename: "openstack_role.bad.yml", errMsg: "unknown OpenStack SD role", }, { filename: "url_in_targetgroup.bad.yml", errMsg: "\"http://bad\" is not a valid hostname", }, { filename: "target_label_missing.bad.yml", errMsg: "relabel configuration for replace action requires 'target_label' value", }, { filename: "target_label_hashmod_missing.bad.yml", errMsg: "relabel configuration for hashmod action requires 'target_label' value", }, { filename: "unknown_global_attr.bad.yml", errMsg: "field nonexistent_field not found in type config.plain", }, { filename: "remote_read_url_missing.bad.yml", errMsg: `url for remote_read is empty`, }, { filename: "remote_write_url_missing.bad.yml", errMsg: `url for remote_write is empty`, }, { filename: "remote_write_dup.bad.yml", errMsg: `found multiple remote write configs with job name "queue1"`, }, { filename: "remote_read_dup.bad.yml", errMsg: `found multiple remote read configs with job name "queue1"`, }, { filename: "ec2_filters_empty_values.bad.yml", errMsg: `EC2 SD configuration filter values cannot be empty`, }, { filename: "section_key_dup.bad.yml", errMsg: "field scrape_configs already set in type config.plain", }, { filename: "azure_client_id_missing.bad.yml", errMsg: "azure SD configuration requires a client_id", }, { filename: "azure_client_secret_missing.bad.yml", errMsg: "azure SD configuration requires a client_secret", }, { filename: "azure_subscription_id_missing.bad.yml", errMsg: "azure SD configuration requires a subscription_id", }, { filename: "azure_tenant_id_missing.bad.yml", errMsg: "azure SD configuration requires a tenant_id", }, { filename: "azure_authentication_method.bad.yml", errMsg: "unknown authentication_type \"invalid\". Supported types are \"OAuth\" or \"ManagedIdentity\"", }, { filename: "empty_scrape_config.bad.yml", errMsg: "empty or null scrape config section", }, { filename: "empty_rw_config.bad.yml", errMsg: "empty or null remote write config section", }, { filename: "empty_rr_config.bad.yml", errMsg: "empty or null remote read config section", }, { filename: "empty_target_relabel_config.bad.yml", errMsg: "empty or null target relabeling rule", }, { filename: "empty_metric_relabel_config.bad.yml", errMsg: "empty or null metric relabeling rule", }, { filename: "empty_alert_relabel_config.bad.yml", errMsg: "empty or null alert relabeling rule", }, { filename: "empty_alertmanager_relabel_config.bad.yml", errMsg: "empty or null Alertmanager target relabeling rule", }, { filename: "empty_rw_relabel_config.bad.yml", errMsg: "empty or null relabeling rule in remote write config", }, { filename: "empty_static_config.bad.yml", errMsg: "empty or null section in static_configs", }, } func TestBadConfigs(t *testing.T) { for _, ee := range expectedErrors { _, err := LoadFile("testdata/" + ee.filename) testutil.NotOk(t, err, "%s", ee.filename) testutil.Assert(t, strings.Contains(err.Error(), ee.errMsg), "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } } func TestBadStaticConfigsJSON(t *testing.T) { content, err := ioutil.ReadFile("testdata/static_config.bad.json") testutil.Ok(t, err) var tg targetgroup.Group err = json.Unmarshal(content, &tg) testutil.NotOk(t, err) } func TestBadStaticConfigsYML(t *testing.T) { content, err := ioutil.ReadFile("testdata/static_config.bad.yml") testutil.Ok(t, err) var tg targetgroup.Group err = yaml.UnmarshalStrict(content, &tg) testutil.NotOk(t, err) } func TestEmptyConfig(t *testing.T) { c, err := Load("") testutil.Ok(t, err) exp := DefaultConfig testutil.Equals(t, exp, *c) } func TestEmptyGlobalBlock(t *testing.T) { c, err := Load("global:\n") testutil.Ok(t, err) exp := DefaultConfig exp.original = "global:\n" testutil.Equals(t, exp, *c) } func kubernetesSDHostURL() config_util.URL { tURL, _ := url.Parse("https://localhost:1234") return config_util.URL{URL: tURL} } prometheus-2.15.2+ds/config/config_windows_test.go000066400000000000000000000015721360540074000222540ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml" var ruleFilesExpectedConf = &Config{ GlobalConfig: DefaultGlobalConfig, RuleFiles: []string{ "testdata\\first.rules", "testdata\\rules\\second.rules", "c:\\absolute\\third.rules", }, original: "", } prometheus-2.15.2+ds/config/testdata/000077500000000000000000000000001360540074000174535ustar00rootroot00000000000000prometheus-2.15.2+ds/config/testdata/azure_authentication_method.bad.yml000066400000000000000000000002011360540074000265010ustar00rootroot00000000000000scrape_configs: - azure_sd_configs: - authentication_method: invalid subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 prometheus-2.15.2+ds/config/testdata/azure_client_id_missing.bad.yml000066400000000000000000000003421360540074000256130ustar00rootroot00000000000000scrape_configs: - job_name: azure azure_sd_configs: - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 client_id: client_secret: mysecretprometheus-2.15.2+ds/config/testdata/azure_client_secret_missing.bad.yml000066400000000000000000000003761360540074000265130ustar00rootroot00000000000000scrape_configs: - job_name: azure azure_sd_configs: - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C client_secret:prometheus-2.15.2+ds/config/testdata/azure_subscription_id_missing.bad.yml000066400000000000000000000003421360540074000270610ustar00rootroot00000000000000scrape_configs: - job_name: azure azure_sd_configs: - subscription_id: tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C client_secret: mysecretprometheus-2.15.2+ds/config/testdata/azure_tenant_id_missing.bad.yml000066400000000000000000000003421360540074000256260ustar00rootroot00000000000000scrape_configs: - job_name: azure azure_sd_configs: - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 tenant_id: client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C client_secret: mysecretprometheus-2.15.2+ds/config/testdata/bearertoken.bad.yml000066400000000000000000000001421360540074000232210ustar00rootroot00000000000000scrape_configs: - job_name: prometheus bearer_token: 1234 bearer_token_file: somefile prometheus-2.15.2+ds/config/testdata/bearertoken_basicauth.bad.yml000066400000000000000000000002001360540074000252370ustar00rootroot00000000000000scrape_configs: - job_name: prometheus bearer_token: 1234 basic_auth: username: user password: password prometheus-2.15.2+ds/config/testdata/conf.good.yml000066400000000000000000000134641360540074000220620ustar00rootroot00000000000000# my global config global: scrape_interval: 15s evaluation_interval: 30s # scrape_timeout is set to the global default (10s). external_labels: monitor: codelab foo: bar rule_files: - "first.rules" - "my/*.rules" remote_write: - url: http://remote1/push name: drop_expensive write_relabel_configs: - source_labels: [__name__] regex: expensive.* action: drop - url: http://remote2/push name: rw_tls tls_config: cert_file: valid_cert_file key_file: valid_key_file remote_read: - url: http://remote1/read read_recent: true name: default - url: http://remote3/read read_recent: false name: read_special required_matchers: job: special tls_config: cert_file: valid_cert_file key_file: valid_key_file scrape_configs: - job_name: prometheus honor_labels: true # scrape_interval is defined by the configured global (15s). # scrape_timeout is defined by the global default (10s). # metrics_path defaults to '/metrics' # scheme defaults to 'http'. file_sd_configs: - files: - foo/*.slow.json - foo/*.slow.yml - single/file.yml refresh_interval: 10m - files: - bar/*.yaml static_configs: - targets: ['localhost:9090', 'localhost:9191'] labels: my: label your: label relabel_configs: - source_labels: [job, __meta_dns_name] regex: (.*)some-[regex] target_label: job replacement: foo-${1} # action defaults to 'replace' - source_labels: [abc] target_label: cde - replacement: static target_label: abc - regex: replacement: static target_label: abc bearer_token_file: valid_token_file - job_name: service-x basic_auth: username: admin_name password: "multiline\nmysecret\ntest" scrape_interval: 50s scrape_timeout: 5s sample_limit: 1000 metrics_path: /my_path scheme: https dns_sd_configs: - refresh_interval: 15s names: - first.dns.address.domain.com - second.dns.address.domain.com - names: - first.dns.address.domain.com # refresh_interval defaults to 30s. relabel_configs: - source_labels: [job] regex: (.*)some-[regex] action: drop - source_labels: [__address__] modulus: 8 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: 1 action: keep - action: labelmap regex: 1 - action: labeldrop regex: d - action: labelkeep regex: k metric_relabel_configs: - source_labels: [__name__] regex: expensive_metric.* action: drop - job_name: service-y consul_sd_configs: - server: 'localhost:1234' token: mysecret services: ['nginx', 'cache', 'mysql'] tags: ["canary", "v1"] node_meta: rack: "123" allow_stale: true scheme: https tls_config: ca_file: valid_ca_file cert_file: valid_cert_file key_file: valid_key_file insecure_skip_verify: false relabel_configs: - source_labels: [__meta_sd_consul_tags] separator: ',' regex: label:([^=]+)=([^,]+) target_label: ${1} replacement: ${2} - job_name: service-z tls_config: cert_file: valid_cert_file key_file: valid_key_file bearer_token: mysecret - job_name: service-kubernetes kubernetes_sd_configs: - role: endpoints api_server: 'https://localhost:1234' tls_config: cert_file: valid_cert_file key_file: valid_key_file basic_auth: username: 'myusername' password: 'mysecret' - job_name: service-kubernetes-namespaces kubernetes_sd_configs: - role: endpoints api_server: 'https://localhost:1234' namespaces: names: - default basic_auth: username: 'myusername' password_file: valid_password_file - job_name: service-marathon marathon_sd_configs: - servers: - 'https://marathon.example.com:443' auth_token: "mysecret" tls_config: cert_file: valid_cert_file key_file: valid_key_file - job_name: service-ec2 ec2_sd_configs: - region: us-east-1 access_key: access secret_key: mysecret profile: profile filters: - name: tag:environment values: - prod - name: tag:service values: - web - db - job_name: service-azure azure_sd_configs: - environment: AzurePublicCloud authentication_method: OAuth subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C client_secret: mysecret port: 9100 - job_name: service-nerve nerve_sd_configs: - servers: - localhost paths: - /monitoring - job_name: 0123service-xxx metrics_path: /metrics static_configs: - targets: - localhost:9090 - job_name: badfederation honor_timestamps: false metrics_path: /federate static_configs: - targets: - localhost:9090 - job_name: 測試 metrics_path: /metrics static_configs: - targets: - localhost:9090 - job_name: service-triton triton_sd_configs: - account: 'testAccount' dns_suffix: 'triton.example.com' endpoint: 'triton.example.com' port: 9163 refresh_interval: 1m version: 1 tls_config: cert_file: valid_cert_file key_file: valid_key_file - job_name: service-openstack openstack_sd_configs: - role: instance region: RegionOne port: 80 refresh_interval: 1m tls_config: ca_file: valid_ca_file cert_file: valid_cert_file key_file: valid_key_file alerting: alertmanagers: - scheme: https static_configs: - targets: - "1.2.3.4:9093" - "1.2.3.5:9093" - "1.2.3.6:9093" prometheus-2.15.2+ds/config/testdata/ec2_filters_empty_values.bad.yml000066400000000000000000000002301360540074000257140ustar00rootroot00000000000000scrape_configs: - job_name: prometheus ec2_sd_configs: - region: 'us-east-1' filters: - name: 'tag:environment' values: prometheus-2.15.2+ds/config/testdata/empty_alert_relabel_config.bad.yml000066400000000000000000000000471360540074000262640ustar00rootroot00000000000000alerting: alert_relabel_configs: - prometheus-2.15.2+ds/config/testdata/empty_alertmanager_relabel_config.bad.yml000066400000000000000000000000661360540074000276200ustar00rootroot00000000000000alerting: alertmanagers: - relabel_configs: - prometheus-2.15.2+ds/config/testdata/empty_metric_relabel_config.bad.yml000066400000000000000000000001011360540074000264270ustar00rootroot00000000000000scrape_configs: - job_name: "test" metric_relabel_configs: - prometheus-2.15.2+ds/config/testdata/empty_rr_config.bad.yml000066400000000000000000000000171360540074000241070ustar00rootroot00000000000000remote_read: - prometheus-2.15.2+ds/config/testdata/empty_rw_config.bad.yml000066400000000000000000000000201360540074000241060ustar00rootroot00000000000000remote_write: - prometheus-2.15.2+ds/config/testdata/empty_rw_relabel_config.bad.yml000066400000000000000000000000771360540074000256100ustar00rootroot00000000000000remote_write: - url: "foo" write_relabel_configs: -prometheus-2.15.2+ds/config/testdata/empty_scrape_config.bad.yml000066400000000000000000000000211360540074000247340ustar00rootroot00000000000000scrape_configs: -prometheus-2.15.2+ds/config/testdata/empty_static_config.bad.yml000066400000000000000000000000711360540074000247530ustar00rootroot00000000000000scrape_configs: - job_name: "test" static_configs: - prometheus-2.15.2+ds/config/testdata/empty_target_relabel_config.bad.yml000066400000000000000000000000721360540074000264410ustar00rootroot00000000000000scrape_configs: - job_name: "test" relabel_configs: - prometheus-2.15.2+ds/config/testdata/first.rules000066400000000000000000000003251360540074000216560ustar00rootroot00000000000000groups: - name: my-group-name rules: - alert: InstanceDown expr: up == 0 for: 1m labels: severity: critical annotations: description: "stuff's happening with {{ $labels.service }}" prometheus-2.15.2+ds/config/testdata/global_timeout.good.yml000066400000000000000000000000631360540074000241320ustar00rootroot00000000000000global: scrape_timeout: 1h scrape_interval: 1h prometheus-2.15.2+ds/config/testdata/jobname.bad.yml000066400000000000000000000000361360540074000223350ustar00rootroot00000000000000scrape_configs: - job_name: prometheus-2.15.2+ds/config/testdata/jobname_dup.bad.yml000066400000000000000000000002301360540074000232010ustar00rootroot00000000000000# Two scrape configs with the same job names are not allowed. scrape_configs: - job_name: prometheus - job_name: service-x - job_name: prometheus prometheus-2.15.2+ds/config/testdata/kubernetes_bearertoken.bad.yml000066400000000000000000000002761360540074000254600ustar00rootroot00000000000000scrape_configs: - job_name: prometheus kubernetes_sd_configs: - role: node api_server: 'https://localhost:1234' bearer_token: 1234 bearer_token_file: somefile prometheus-2.15.2+ds/config/testdata/kubernetes_bearertoken_basicauth.bad.yml000066400000000000000000000003371360540074000275010ustar00rootroot00000000000000scrape_configs: - job_name: prometheus kubernetes_sd_configs: - role: pod api_server: 'https://localhost:1234' bearer_token: 1234 basic_auth: username: user password: password prometheus-2.15.2+ds/config/testdata/kubernetes_empty_apiserver.good.yml000066400000000000000000000001241360540074000265670ustar00rootroot00000000000000scrape_configs: - job_name: prometheus kubernetes_sd_configs: - role: endpoints prometheus-2.15.2+ds/config/testdata/kubernetes_http_config_without_api_server.bad.yml000066400000000000000000000001551360540074000314610ustar00rootroot00000000000000scrape_configs: - job_name: prometheus kubernetes_sd_configs: - role: pod bearer_token: 1234 prometheus-2.15.2+ds/config/testdata/kubernetes_namespace_discovery.bad.yml000066400000000000000000000001731360540074000271760ustar00rootroot00000000000000scrape_configs: - kubernetes_sd_configs: - api_server: kubernetes:443 role: endpoints namespaces: foo: bar prometheus-2.15.2+ds/config/testdata/kubernetes_role.bad.yml000066400000000000000000000001341360540074000241110ustar00rootroot00000000000000scrape_configs: - kubernetes_sd_configs: - api_server: kubernetes:443 role: vacation prometheus-2.15.2+ds/config/testdata/labeldrop.bad.yml000066400000000000000000000001701360540074000226650ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - source_labels: [abcdef] action: labeldrop prometheus-2.15.2+ds/config/testdata/labeldrop2.bad.yml000066400000000000000000000001541360540074000227510ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - modulus: 8 action: labeldrop prometheus-2.15.2+ds/config/testdata/labeldrop3.bad.yml000066400000000000000000000001601360540074000227470ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - separator: ',' action: labeldrop prometheus-2.15.2+ds/config/testdata/labeldrop4.bad.yml000066400000000000000000000001671360540074000227570ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - replacement: yolo-{1} action: labeldrop prometheus-2.15.2+ds/config/testdata/labeldrop5.bad.yml000066400000000000000000000001631360540074000227540ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - target_label: yolo action: labeldrop prometheus-2.15.2+ds/config/testdata/labelkeep.bad.yml000066400000000000000000000001701360540074000226450ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - source_labels: [abcdef] action: labelkeep prometheus-2.15.2+ds/config/testdata/labelkeep2.bad.yml000066400000000000000000000001541360540074000227310ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - modulus: 8 action: labelkeep prometheus-2.15.2+ds/config/testdata/labelkeep3.bad.yml000066400000000000000000000001601360540074000227270ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - separator: ',' action: labelkeep prometheus-2.15.2+ds/config/testdata/labelkeep4.bad.yml000066400000000000000000000001671360540074000227370ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - replacement: yolo-{1} action: labelkeep prometheus-2.15.2+ds/config/testdata/labelkeep5.bad.yml000066400000000000000000000001631360540074000227340ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - target_label: yolo action: labelkeep prometheus-2.15.2+ds/config/testdata/labelmap.bad.yml000066400000000000000000000001611360540074000224760ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - action: labelmap replacement: l-$1 prometheus-2.15.2+ds/config/testdata/labelname.bad.yml000066400000000000000000000000621360540074000226410ustar00rootroot00000000000000global: external_labels: not$allowed: value prometheus-2.15.2+ds/config/testdata/labelname2.bad.yml000066400000000000000000000000641360540074000227250ustar00rootroot00000000000000global: external_labels: 'not:allowed': value prometheus-2.15.2+ds/config/testdata/labelvalue.bad.yml000066400000000000000000000000641360540074000230370ustar00rootroot00000000000000global: external_labels: name: !!binary "/w=="prometheus-2.15.2+ds/config/testdata/marathon_authtoken_authtokenfile.bad.yml000066400000000000000000000002531360540074000275400ustar00rootroot00000000000000scrape_configs: - job_name: prometheus marathon_sd_configs: - servers: - 'https://localhost:1234' auth_token: 1234 auth_token_file: somefile prometheus-2.15.2+ds/config/testdata/marathon_authtoken_basicauth.bad.yml000066400000000000000000000003171360540074000266420ustar00rootroot00000000000000scrape_configs: - job_name: prometheus marathon_sd_configs: - servers: - 'https://localhost:1234' auth_token: 1234 basic_auth: username: user password: password prometheus-2.15.2+ds/config/testdata/marathon_authtoken_bearertoken.bad.yml000066400000000000000000000002441360540074000271770ustar00rootroot00000000000000scrape_configs: - job_name: prometheus marathon_sd_configs: - servers: - 'https://localhost:1234' auth_token: 1234 bearer_token: 4567 prometheus-2.15.2+ds/config/testdata/marathon_no_servers.bad.yml000066400000000000000000000002441360540074000250010ustar00rootroot00000000000000# my global config global: scrape_interval: 15s evaluation_interval: 30s scrape_configs: - job_name: service-marathon marathon_sd_configs: - servers: prometheus-2.15.2+ds/config/testdata/modulus_missing.bad.yml000066400000000000000000000001541360540074000241440ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - regex: abcdef action: hashmod prometheus-2.15.2+ds/config/testdata/openstack_role.bad.yml000066400000000000000000000000731360540074000237330ustar00rootroot00000000000000scrape_configs: - openstack_sd_configs: - role: invalid prometheus-2.15.2+ds/config/testdata/regex.bad.yml000066400000000000000000000001251360540074000220330ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - regex: abc(def prometheus-2.15.2+ds/config/testdata/remote_read_dup.bad.yml000066400000000000000000000001461360540074000240620ustar00rootroot00000000000000remote_read: - url: http://localhost:9090 name: queue1 - url: localhost:9091 name: queue1 prometheus-2.15.2+ds/config/testdata/remote_read_url_missing.bad.yml000066400000000000000000000000261360540074000256220ustar00rootroot00000000000000remote_read: - url: prometheus-2.15.2+ds/config/testdata/remote_write_dup.bad.yml000066400000000000000000000001411360540074000242740ustar00rootroot00000000000000remote_write: - url: localhost:9090 name: queue1 - url: localhost:9091 name: queue1 prometheus-2.15.2+ds/config/testdata/remote_write_url_missing.bad.yml000066400000000000000000000000271360540074000260420ustar00rootroot00000000000000remote_write: - url: prometheus-2.15.2+ds/config/testdata/rules.bad.yml000066400000000000000000000000671360540074000220600ustar00rootroot00000000000000rule_files: - 'my_rule' # fine - 'my/*/rule' # bad prometheus-2.15.2+ds/config/testdata/rules_abs_path.good.yml000066400000000000000000000001311360540074000241130ustar00rootroot00000000000000rule_files: - 'first.rules' - 'rules/second.rules' - '/absolute/third.rules' prometheus-2.15.2+ds/config/testdata/rules_abs_path_windows.good.yml000066400000000000000000000001331360540074000256670ustar00rootroot00000000000000rule_files: - 'first.rules' - 'rules\second.rules' - 'c:\absolute\third.rules' prometheus-2.15.2+ds/config/testdata/scrape_interval.bad.yml000066400000000000000000000001231360540074000241000ustar00rootroot00000000000000scrape_configs: - job_name: prometheus scrape_interval: 5s scrape_timeout: 6s prometheus-2.15.2+ds/config/testdata/scrape_interval_larger.good.yml000066400000000000000000000004731360540074000256460ustar00rootroot00000000000000global: scrape_interval: 15s scrape_timeout: 15s scrape_configs: - job_name: prometheus scrape_interval: 5s dns_sd_configs: - refresh_interval: 15s names: - first.dns.address.domain.com - second.dns.address.domain.com - names: - first.dns.address.domain.comprometheus-2.15.2+ds/config/testdata/section_key_dup.bad.yml000066400000000000000000000001361360540074000241070ustar00rootroot00000000000000scrape_configs: - job_name: 'prometheus_system' scrape_configs: - job_name: 'foo_system' prometheus-2.15.2+ds/config/testdata/static_config.bad.json000066400000000000000000000001741360540074000237110ustar00rootroot00000000000000{ "targets": ["1.2.3.4:9100"], "labels": { "some_valid_label": "foo", "oops:this-label-is-invalid": "bar" } } prometheus-2.15.2+ds/config/testdata/static_config.bad.yml000066400000000000000000000001341360540074000235350ustar00rootroot00000000000000targets: ['1.2.3.4:9001', '1.2.3.5:9090'] labels: valid_label: foo not:valid_label: bar prometheus-2.15.2+ds/config/testdata/target_label_hashmod_missing.bad.yml000066400000000000000000000002331360540074000266020ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - source_labels: [__address__] modulus: 8 action: hashmod prometheus-2.15.2+ds/config/testdata/target_label_missing.bad.yml000066400000000000000000000001241360540074000250760ustar00rootroot00000000000000scrape_configs: - job_name: prometheus relabel_configs: - regex: abcdef prometheus-2.15.2+ds/config/testdata/unknown_attr.bad.yml000066400000000000000000000005531360540074000234570ustar00rootroot00000000000000# my global config global: scrape_interval: 15s evaluation_interval: 30s # scrape_timeout is set to the global default (10s). external_labels: monitor: codelab foo: bar rule_files: - "first.rules" - "second.rules" - "my/*.rules" scrape_configs: - job_name: prometheus consult_sd_configs: - server: 'localhost:1234' prometheus-2.15.2+ds/config/testdata/unknown_global_attr.bad.yml000066400000000000000000000000421360540074000247700ustar00rootroot00000000000000global: nonexistent_field: test prometheus-2.15.2+ds/config/testdata/url_in_targetgroup.bad.yml000066400000000000000000000001271360540074000246360ustar00rootroot00000000000000scrape_configs: - job_name: prometheus static_configs: - targets: - http://bad prometheus-2.15.2+ds/console_libraries/000077500000000000000000000000001360540074000200735ustar00rootroot00000000000000prometheus-2.15.2+ds/console_libraries/menu.lib000066400000000000000000000055101360540074000215300ustar00rootroot00000000000000{{/* vim: set ft=html: */}} {{/* Navbar, should be passed . */}} {{ define "navbar" }} {{ end }} {{/* LHS menu, should be passed . */}} {{ define "menu" }}
{{ end }} {{/* Helper, pass (args . path name) */}} {{ define "_menuItem" }} {{ end }} prometheus-2.15.2+ds/console_libraries/prom.lib000066400000000000000000000136601360540074000215460ustar00rootroot00000000000000{{/* vim: set ft=html: */}} {{/* Load Prometheus console library JS/CSS. Should go in */}} {{ define "prom_console_head" }} {{ end }} {{/* Top of all pages. */}} {{ define "head" -}} {{ template "prom_console_head" }} {{ template "navbar" . }} {{ template "menu" . }} {{ end }} {{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }} {{ define "humanize" }}{{ humanize . }}{{ end }} {{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }} {{ define "humanize1024" }}{{ humanize1024 . }}{{ end }} {{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }} {{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }} {{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }} {{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }} {{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }} {{/* prom_query_drilldown (args expr suffix? renderTemplate?) Displays the result of the expression, with a link to /graph for it. renderTemplate is the name of the template to use to render the value. */}} {{ define "prom_query_drilldown" }} {{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }} {{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }} {{ end }} {{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&{{ end }}{{ end }}" {{ define "prom_right_table_head" }}
{{ end }} {{ define "prom_right_table_tail" }}
{{ end }} {{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}} {{ define "prom_right_table_job_head" }} {{ . }} {{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }} CPU {{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }} Memory {{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }} {{ end }} {{ define "prom_content_head" }}
{{ template "prom_graph_timecontrol" . }} {{ end }} {{ define "prom_content_tail" }}
{{ end }} {{ define "prom_graph_timecontrol" }}
{{ end }} {{/* Bottom of all pages. */}} {{ define "tail" }} {{ end }} prometheus-2.15.2+ds/consoles/000077500000000000000000000000001360540074000162225ustar00rootroot00000000000000prometheus-2.15.2+ds/consoles/index.html.example000066400000000000000000000011501360540074000216460ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_right_table_head" }} {{ template "prom_right_table_tail" }} {{ template "prom_content_head" . }}

Overview

These are example consoles for Prometheus.

These consoles expect exporters to have the following job labels:

Exporter Job label
Node Exporter node
Prometheus prometheus
{{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/node-cpu.html000066400000000000000000000051631360540074000206270ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_right_table_head" }} CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }} {{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }} {{ .Labels.mode | title }} CPU {{ .Value | printf "%.1f" }}% {{ end }} Misc Processes Running {{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }} Processes Blocked {{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }} Forks {{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} Context Switches {{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} Interrupts {{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} 1m Loadavg {{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }} {{ template "prom_right_table_tail" }} {{ template "prom_content_head" . }}

Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

CPU Usage

{{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/node-disk.html000066400000000000000000000066711360540074000207770ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_content_head" . }}

Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

Disk I/O Utilization

Filesystem Usage

{{ template "prom_right_table_head" }} Disks {{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} {{ .Labels.device }} Utilization {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} Throughput {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} Avg Read Time {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} Avg Write Time {{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} {{ end }} Filesystem Fullness {{ define "roughlyNearZero" }} {{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} {{ end }} {{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} {{ .Labels.mountpoint }} {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} {{ end }} {{ template "prom_right_table_tail" }} {{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/node-overview.html000066400000000000000000000132421360540074000217030ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_content_head" . }}

Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

CPU Usage

Disk I/O Utilization

Memory

{{ template "prom_right_table_head" }} Overview User CPU {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} System CPU {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} Memory Total {{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} Memory Free {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} Network {{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }} {{ .Labels.device }} Received {{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} {{ .Labels.device }} Transmitted {{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} {{ end }} Disks {{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }} {{ .Labels.device }} Utilization {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} {{ end }} {{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} {{ .Labels.device }} Throughput {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} {{ end }} Filesystem Fullness {{ define "roughlyNearZero" }} {{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} {{ end }} {{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} {{ .Labels.mountpoint }} {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} {{ end }} {{ template "prom_right_table_tail" }} {{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/node.html000066400000000000000000000026441360540074000200430ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_right_table_head" }} Node {{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }} {{ template "prom_right_table_tail" }} {{ template "prom_content_head" . }}

Node

{{ range query "up{job='node'}" | sortByLabel "instance" }} Yes{{ else }} class="alert-danger">No{{ end }} {{ else }} {{ end }} {{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/prometheus-overview.html000066400000000000000000000100071360540074000231450ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_right_table_head" }} {{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }} {{ end }} {{ template "prom_right_table_tail" }} {{ template "prom_content_head" . }}

Prometheus Overview - {{ .Params.instance }}

Ingested Samples

HTTP Server

{{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/consoles/prometheus.html000066400000000000000000000024661360540074000213130ustar00rootroot00000000000000{{ template "head" . }} {{ template "prom_right_table_head" }} {{ template "prom_right_table_tail" }} {{ template "prom_content_head" . }}

Prometheus

Node Up CPU
Used
Memory
Available
{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }} {{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance)(irate(node_cpu_seconds_total{job='node',mode='idle',instance='%s'}[5m])))" .Labels.instance) "%" "printf.1f") }} {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }}
No nodes found.
Overview
CPU {{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }}
Memory {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }}
Version {{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}}
Storage
Ingested Samples {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }}
Head Series {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}
Blocks Loaded {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}
Rules
Evaluation Duration {{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}
Notification Latency {{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}
Notification Queue {{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}
HTTP Server
{{ .Labels.handler }} {{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }}
Prometheus {{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }}
{{ range query "up{job='prometheus'}" | sortByLabel "instance" }} {{ else }} {{ end }}
Prometheus Up Ingested Samples Memory
{{ .Labels.instance }} Yes{{ else }} class="alert-danger">No{{ end }} {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }} {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}}
No devices found.
{{ template "prom_content_tail" . }} {{ template "tail" }} prometheus-2.15.2+ds/discovery/000077500000000000000000000000001360540074000164045ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/README.md000066400000000000000000000231231360540074000176640ustar00rootroot00000000000000# Service Discovery This directory contains the service discovery (SD) component of Prometheus. ## Design of a Prometheus SD There are many requests to add new SDs to Prometheus, this section looks at what makes a good SD and covers some of the common implementation issues. ### Does this make sense as an SD? The first question to be asked is does it make sense to add this particular SD? An SD mechanism should be reasonably well established, and at a minimum in use across multiple organizations. It should allow discovering of machines and/or services running somewhere. When exactly an SD is popular enough to justify being added to Prometheus natively is an open question. Note: As part of lifting the past moratorium on new SD implementations it was agreed that, in addition to the existing requirements, new service discovery implementations will be required to have a committed maintainer with push access (i.e., on -team). It should not be a brand new SD mechanism, or a variant of an established mechanism. We want to integrate Prometheus with the SD that's already there in your infrastructure, not invent yet more ways to do service discovery. We also do not add mechanisms to work around users lacking service discovery and/or configuration management infrastructure. SDs that merely discover other applications running the same software (e.g. talk to one Kafka or Cassandra server to find the others) are not service discovery. In that case the SD you should be looking at is whatever decides that a machine is going to be a Kafka server, likely a machine database or configuration management system. If something is particularly custom or unusual, `file_sd` is the generic mechanism provided for users to hook in. Generally with Prometheus we offer a single generic mechanism for things with infinite variations, rather than trying to support everything natively (see also, alertmanager webhook, remote read, remote write, node exporter textfile collector). For example anything that would involve talking to a relational database should use `file_sd` instead. For configuration management systems like Chef, while they do have a database/API that'd in principle make sense to talk to for service discovery, the idiomatic approach is to use Chef's templating facilities to write out a file for use with `file_sd`. ### Mapping from SD to Prometheus The general principle with SD is to extract all the potentially useful information we can out of the SD, and let the user choose what they need of it using [relabelling](https://prometheus.io/docs/operating/configuration/#). This information is generally termed metadata. Metadata is exposed as a set of key/value pairs (labels) per target. The keys are prefixed with `__meta__`, and there should also be an `__address__` label with the host:port of the target (preferably an IP address to avoid DNS lookups). No other labelnames should be exposed. It is very common for initial pull requests for new SDs to include hardcoded assumptions that make sense for the author's setup. SD should be generic, any customisation should be handled via relabelling. There should be basically no business logic, filtering, or transformations of the data from the SD beyond that which is needed to fit it into the metadata data model. Arrays (e.g. a list of tags) should be converted to a single label with the array values joined with a comma. Also prefix and suffix the value with a comma. So for example the array `[a, b, c]` would become `,a,b,c,`. As relabelling regexes are fully anchored, this makes it easier to write correct regexes against (`.*,a,.*` works no matter where `a` appears in the list). The canonical example of this is `__meta_consul_tags`. Maps, hashes and other forms of key/value pairs should be all prefixed and exposed as labels. For example for EC2 tags, there would be `__meta_ec2_tag_Description=mydescription` for the Description tag. Labelnames may only contain `[_a-zA-Z0-9]`, sanitize by replacing with underscores as needed. For targets with multiple potential ports, you can a) expose them as a list, b) if they're named expose them as a map or c) expose them each as their own target. Kubernetes SD takes the target per port approach. a) and b) can be combined. For machine-like SDs (OpenStack, EC2, Kubernetes to some extent) there may be multiple network interfaces for a target. Thus far reporting the details of only the first/primary network interface has sufficed. ### Other implementation considerations SDs are intended to dump all possible targets. For example the optional use of EC2 service discovery would be to take the entire region's worth of EC2 instances it provides and do everything needed in one `scrape_config`. For large deployments where you are only interested in a small proportion of the returned targets, this may cause performance issues. If this occurs it is acceptable to also offer filtering via whatever mechanisms the SD exposes. For EC2 that would be the `Filter` option on `DescribeInstances`. Keep in mind that this is a performance optimisation, it should be possible to do the same filtering using relabelling alone. As with SD generally, we do not invent new ways to filter targets (that is what relabelling is for), merely offer up whatever functionality the SD itself offers. It is a general rule with Prometheus that all configuration comes from the configuration file. While the libraries you use to talk to the SD may also offer other mechanisms for providing configuration/authentication under the covers (EC2's use of environment variables being a prime example), using your SD mechanism should not require this. Put another way, your SD implementation should not read environment variables or files to obtain configuration. Some SD mechanisms have rate limits that make them challenging to use. As an example we have unfortunately had to reject Amazon ECS service discovery due to the rate limits being so low that it would not be usable for anything beyond small setups. If a system offers multiple distinct types of SD, select which is in use with a configuration option rather than returning them all from one mega SD that requires relabelling to select just the one you want. So far we have only seen this with Kubernetes. When a single SD with a selector vs. multiple distinct SDs makes sense is an open question. If there is a failure while processing talking to the SD, abort rather than returning partial data. It is better to work from stale targets than partial or incorrect metadata. The information obtained from service discovery is not considered sensitive security wise. Do not return secrets in metadata, anyone with access to the Prometheus server will be able to see them. ## Writing an SD mechanism ### The SD interface A Service Discovery (SD) mechanism has to discover targets and provide them to Prometheus. We expect similar targets to be grouped together, in the form of a [target group](https://godoc.org/github.com/prometheus/prometheus/discovery/targetgroup#Group). The SD mechanism sends the targets down to prometheus as list of target groups. An SD mechanism has to implement the `Discoverer` Interface: ```go type Discoverer interface { Run(ctx context.Context, up chan<- []*targetgroup.Group) } ``` Prometheus will call the `Run()` method on a provider to initialize the discovery mechanism. The mechanism will then send *all* the target groups into the channel. Now the mechanism will watch for changes. For each update it can send all target groups, or only changed and new target groups, down the channel. `Manager` will handle both cases. For example if we had a discovery mechanism and it retrieves the following groups: ``` []targetgroup.Group{ { Targets: []model.LabelSet{ { "__instance__": "10.11.150.1:7870", "hostname": "demo-target-1", "test": "simple-test", }, { "__instance__": "10.11.150.4:7870", "hostname": "demo-target-2", "test": "simple-test", }, }, Labels: map[LabelName][LabelValue] { "job": "mysql", }, "Source": "file1", }, { Targets: []model.LabelSet{ { "__instance__": "10.11.122.11:6001", "hostname": "demo-postgres-1", "test": "simple-test", }, { "__instance__": "10.11.122.15:6001", "hostname": "demo-postgres-2", "test": "simple-test", }, }, Labels: map[LabelName][LabelValue] { "job": "postgres", }, "Source": "file2", }, } ``` Here there are two target groups one group with source `file1` and another with `file2`. The grouping is implementation specific and could even be one target per group. But, one has to make sure every target group sent by an SD instance should have a `Source` which is unique across all the target groups of that SD instance. In this case, both the target groups are sent down the channel the first time `Run()` is called. Now, for an update, we need to send the whole _changed_ target group down the channel. i.e, if the target with `hostname: demo-postgres-2` goes away, we send: ``` &targetgroup.Group{ Targets: []model.LabelSet{ { "__instance__": "10.11.122.11:6001", "hostname": "demo-postgres-1", "test": "simple-test", }, }, Labels: map[LabelName][LabelValue] { "job": "postgres", }, "Source": "file2", } ``` down the channel. If all the targets in a group go away, we need to send the target groups with empty `Targets` down the channel. i.e, if all targets with `job: postgres` go away, we send: ``` &targetgroup.Group{ Targets: nil, "Source": "file2", } ``` down the channel. prometheus-2.15.2+ds/discovery/azure/000077500000000000000000000000001360540074000175325ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/azure/azure.go000066400000000000000000000405061360540074000212140ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package azure import ( "context" "fmt" "net" "net/http" "strings" "sync" "time" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-10-01/network" "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/adal" "github.com/Azure/go-autorest/autorest/azure" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( azureLabel = model.MetaLabelPrefix + "azure_" azureLabelSubscriptionID = azureLabel + "subscription_id" azureLabelTenantID = azureLabel + "tenant_id" azureLabelMachineID = azureLabel + "machine_id" azureLabelMachineResourceGroup = azureLabel + "machine_resource_group" azureLabelMachineName = azureLabel + "machine_name" azureLabelMachineOSType = azureLabel + "machine_os_type" azureLabelMachineLocation = azureLabel + "machine_location" azureLabelMachinePrivateIP = azureLabel + "machine_private_ip" azureLabelMachinePublicIP = azureLabel + "machine_public_ip" azureLabelMachineTag = azureLabel + "machine_tag_" azureLabelMachineScaleSet = azureLabel + "machine_scale_set" authMethodOAuth = "OAuth" authMethodManagedIdentity = "ManagedIdentity" ) // DefaultSDConfig is the default Azure SD configuration. var DefaultSDConfig = SDConfig{ Port: 80, RefreshInterval: model.Duration(5 * time.Minute), Environment: azure.PublicCloud.Name, AuthenticationMethod: authMethodOAuth, } // SDConfig is the configuration for Azure based service discovery. type SDConfig struct { Environment string `yaml:"environment,omitempty"` Port int `yaml:"port"` SubscriptionID string `yaml:"subscription_id"` TenantID string `yaml:"tenant_id,omitempty"` ClientID string `yaml:"client_id,omitempty"` ClientSecret config_util.Secret `yaml:"client_secret,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` AuthenticationMethod string `yaml:"authentication_method,omitempty"` } func validateAuthParam(param, name string) error { if len(param) == 0 { return errors.Errorf("azure SD configuration requires a %s", name) } return nil } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil { return err } if c.AuthenticationMethod == authMethodOAuth { if err = validateAuthParam(c.TenantID, "tenant_id"); err != nil { return err } if err = validateAuthParam(c.ClientID, "client_id"); err != nil { return err } if err = validateAuthParam(string(c.ClientSecret), "client_secret"); err != nil { return err } } if c.AuthenticationMethod != authMethodOAuth && c.AuthenticationMethod != authMethodManagedIdentity { return errors.Errorf("unknown authentication_type %q. Supported types are %q or %q", c.AuthenticationMethod, authMethodOAuth, authMethodManagedIdentity) } return nil } type Discovery struct { *refresh.Discovery logger log.Logger cfg *SDConfig port int } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery { if logger == nil { logger = log.NewNopLogger() } d := &Discovery{ cfg: cfg, port: cfg.Port, logger: logger, } d.Discovery = refresh.NewDiscovery( logger, "azure", time.Duration(cfg.RefreshInterval), d.refresh, ) return d } // azureClient represents multiple Azure Resource Manager providers. type azureClient struct { nic network.InterfacesClient vm compute.VirtualMachinesClient vmss compute.VirtualMachineScaleSetsClient vmssvm compute.VirtualMachineScaleSetVMsClient } // createAzureClient is a helper function for creating an Azure compute client to ARM. func createAzureClient(cfg SDConfig) (azureClient, error) { env, err := azure.EnvironmentFromName(cfg.Environment) if err != nil { return azureClient{}, err } activeDirectoryEndpoint := env.ActiveDirectoryEndpoint resourceManagerEndpoint := env.ResourceManagerEndpoint var c azureClient var spt *adal.ServicePrincipalToken switch cfg.AuthenticationMethod { case authMethodManagedIdentity: msiEndpoint, err := adal.GetMSIVMEndpoint() if err != nil { return azureClient{}, err } spt, err = adal.NewServicePrincipalTokenFromMSI(msiEndpoint, resourceManagerEndpoint) if err != nil { return azureClient{}, err } case authMethodOAuth: oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID) if err != nil { return azureClient{}, err } spt, err = adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint) if err != nil { return azureClient{}, err } } bearerAuthorizer := autorest.NewBearerAuthorizer(spt) c.vm = compute.NewVirtualMachinesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID) c.vm.Authorizer = bearerAuthorizer c.nic = network.NewInterfacesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID) c.nic.Authorizer = bearerAuthorizer c.vmss = compute.NewVirtualMachineScaleSetsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID) c.vmss.Authorizer = bearerAuthorizer c.vmssvm = compute.NewVirtualMachineScaleSetVMsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID) c.vmssvm.Authorizer = bearerAuthorizer return c, nil } // azureResource represents a resource identifier in Azure. type azureResource struct { Name string ResourceGroup string } // virtualMachine represents an Azure virtual machine (which can also be created by a VMSS) type virtualMachine struct { ID string Name string Type string Location string OsType string ScaleSet string Tags map[string]*string NetworkInterfaces []string } // Create a new azureResource object from an ID string. func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) { // Resource IDs have the following format. // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME // or if embedded resource then // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME/TYPE/NAME s := strings.Split(id, "/") if len(s) != 9 && len(s) != 11 { err := errors.Errorf("invalid ID '%s'. Refusing to create azureResource", id) level.Error(logger).Log("err", err) return azureResource{}, err } return azureResource{ Name: strings.ToLower(s[8]), ResourceGroup: strings.ToLower(s[4]), }, nil } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { defer level.Debug(d.logger).Log("msg", "Azure discovery completed") client, err := createAzureClient(*d.cfg) if err != nil { return nil, errors.Wrap(err, "could not create Azure client") } machines, err := client.getVMs(ctx) if err != nil { return nil, errors.Wrap(err, "could not get virtual machines") } level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines)) // Load the vms managed by scale sets. scaleSets, err := client.getScaleSets(ctx) if err != nil { return nil, errors.Wrap(err, "could not get virtual machine scale sets") } for _, scaleSet := range scaleSets { scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet) if err != nil { return nil, errors.Wrap(err, "could not get virtual machine scale set vms") } machines = append(machines, scaleSetVms...) } // We have the slice of machines. Now turn them into targets. // Doing them in go routines because the network interface calls are slow. type target struct { labelSet model.LabelSet err error } var wg sync.WaitGroup wg.Add(len(machines)) ch := make(chan target, len(machines)) for i, vm := range machines { go func(i int, vm virtualMachine) { defer wg.Done() r, err := newAzureResourceFromID(vm.ID, d.logger) if err != nil { ch <- target{labelSet: nil, err: err} return } labels := model.LabelSet{ azureLabelSubscriptionID: model.LabelValue(d.cfg.SubscriptionID), azureLabelTenantID: model.LabelValue(d.cfg.TenantID), azureLabelMachineID: model.LabelValue(vm.ID), azureLabelMachineName: model.LabelValue(vm.Name), azureLabelMachineOSType: model.LabelValue(vm.OsType), azureLabelMachineLocation: model.LabelValue(vm.Location), azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroup), } if vm.ScaleSet != "" { labels[azureLabelMachineScaleSet] = model.LabelValue(vm.ScaleSet) } if vm.Tags != nil { for k, v := range vm.Tags { name := strutil.SanitizeLabelName(k) labels[azureLabelMachineTag+model.LabelName(name)] = model.LabelValue(*v) } } // Get the IP address information via separate call to the network provider. for _, nicID := range vm.NetworkInterfaces { networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID) if err != nil { level.Error(d.logger).Log("msg", "Unable to get network interface", "name", nicID, "err", err) ch <- target{labelSet: nil, err: err} // Get out of this routine because we cannot continue without a network interface. return } if networkInterface.InterfacePropertiesFormat == nil { continue } // Unfortunately Azure does not return information on whether a VM is deallocated. // This information is available via another API call however the Go SDK does not // yet support this. On deallocated machines, this value happens to be nil so it // is a cheap and easy way to determine if a machine is allocated or not. if networkInterface.Primary == nil { level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) return } if *networkInterface.Primary { for _, ip := range *networkInterface.IPConfigurations { if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil { labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.PublicIPAddress.IPAddress) } if ip.PrivateIPAddress != nil { labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.PrivateIPAddress) address := net.JoinHostPort(*ip.PrivateIPAddress, fmt.Sprintf("%d", d.port)) labels[model.AddressLabel] = model.LabelValue(address) ch <- target{labelSet: labels, err: nil} return } // If we made it here, we don't have a private IP which should be impossible. // Return an empty target and error to ensure an all or nothing situation. err = errors.Errorf("unable to find a private IP for VM %s", vm.Name) ch <- target{labelSet: nil, err: err} return } } } }(i, vm) } wg.Wait() close(ch) var tg targetgroup.Group for tgt := range ch { if tgt.err != nil { return nil, errors.Wrap(err, "unable to complete Azure service discovery") } if tgt.labelSet != nil { tg.Targets = append(tg.Targets, tgt.labelSet) } } return []*targetgroup.Group{&tg}, nil } func (client *azureClient) getVMs(ctx context.Context) ([]virtualMachine, error) { var vms []virtualMachine result, err := client.vm.ListAll(ctx) if err != nil { return nil, errors.Wrap(err, "could not list virtual machines") } for result.NotDone() { for _, vm := range result.Values() { vms = append(vms, mapFromVM(vm)) } err = result.NextWithContext(ctx) if err != nil { return nil, errors.Wrap(err, "could not list virtual machines") } } return vms, nil } func (client *azureClient) getScaleSets(ctx context.Context) ([]compute.VirtualMachineScaleSet, error) { var scaleSets []compute.VirtualMachineScaleSet result, err := client.vmss.ListAll(ctx) if err != nil { return nil, errors.Wrap(err, "could not list virtual machine scale sets") } for result.NotDone() { scaleSets = append(scaleSets, result.Values()...) err = result.NextWithContext(ctx) if err != nil { return nil, errors.Wrap(err, "could not list virtual machine scale sets") } } return scaleSets, nil } func (client *azureClient) getScaleSetVMs(ctx context.Context, scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) { var vms []virtualMachine //TODO do we really need to fetch the resourcegroup this way? r, err := newAzureResourceFromID(*scaleSet.ID, nil) if err != nil { return nil, errors.Wrap(err, "could not parse scale set ID") } result, err := client.vmssvm.List(ctx, r.ResourceGroup, *(scaleSet.Name), "", "", "") if err != nil { return nil, errors.Wrap(err, "could not list virtual machine scale set vms") } for result.NotDone() { for _, vm := range result.Values() { vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name)) } err = result.NextWithContext(ctx) if err != nil { return nil, errors.Wrap(err, "could not list virtual machine scale set vms") } } return vms, nil } func mapFromVM(vm compute.VirtualMachine) virtualMachine { osType := string(vm.StorageProfile.OsDisk.OsType) tags := map[string]*string{} networkInterfaces := []string{} if vm.Tags != nil { tags = vm.Tags } if vm.NetworkProfile != nil { for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) { networkInterfaces = append(networkInterfaces, *vmNIC.ID) } } return virtualMachine{ ID: *(vm.ID), Name: *(vm.Name), Type: *(vm.Type), Location: *(vm.Location), OsType: osType, ScaleSet: "", Tags: tags, NetworkInterfaces: networkInterfaces, } } func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine { osType := string(vm.StorageProfile.OsDisk.OsType) tags := map[string]*string{} networkInterfaces := []string{} if vm.Tags != nil { tags = vm.Tags } if vm.NetworkProfile != nil { for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) { networkInterfaces = append(networkInterfaces, *vmNIC.ID) } } return virtualMachine{ ID: *(vm.ID), Name: *(vm.Name), Type: *(vm.Type), Location: *(vm.Location), OsType: osType, ScaleSet: scaleSetName, Tags: tags, NetworkInterfaces: networkInterfaces, } } func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*network.Interface, error) { result := network.Interface{} queryParameters := map[string]interface{}{ "api-version": "2018-10-01", } preparer := autorest.CreatePreparer( autorest.AsGet(), autorest.WithBaseURL(client.nic.BaseURI), autorest.WithPath(networkInterfaceID), autorest.WithQueryParameters(queryParameters)) req, err := preparer.Prepare((&http.Request{}).WithContext(ctx)) if err != nil { return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request") } resp, err := client.nic.GetSender(req) if err != nil { return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request") } result, err = client.nic.GetResponder(resp) if err != nil { return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request") } return &result, nil } prometheus-2.15.2+ds/discovery/azure/azure_test.go000066400000000000000000000124421360540074000222510ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package azure import ( "reflect" "testing" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" ) func TestMapFromVMWithEmptyTags(t *testing.T) { id := "test" name := "name" vmType := "type" location := "westeurope" networkProfile := compute.NetworkProfile{ NetworkInterfaces: &[]compute.NetworkInterfaceReference{}, } properties := &compute.VirtualMachineProperties{ StorageProfile: &compute.StorageProfile{ OsDisk: &compute.OSDisk{ OsType: "Linux", }, }, NetworkProfile: &networkProfile, } testVM := compute.VirtualMachine{ ID: &id, Name: &name, Type: &vmType, Location: &location, Tags: nil, VirtualMachineProperties: properties, } expectedVM := virtualMachine{ ID: id, Name: name, Type: vmType, Location: location, OsType: "Linux", Tags: map[string]*string{}, NetworkInterfaces: []string{}, } actualVM := mapFromVM(testVM) if !reflect.DeepEqual(expectedVM, actualVM) { t.Errorf("Expected %v got %v", expectedVM, actualVM) } } func TestMapFromVMWithTags(t *testing.T) { id := "test" name := "name" vmType := "type" location := "westeurope" tags := map[string]*string{ "prometheus": new(string), } networkProfile := compute.NetworkProfile{ NetworkInterfaces: &[]compute.NetworkInterfaceReference{}, } properties := &compute.VirtualMachineProperties{ StorageProfile: &compute.StorageProfile{ OsDisk: &compute.OSDisk{ OsType: "Linux", }, }, NetworkProfile: &networkProfile, } testVM := compute.VirtualMachine{ ID: &id, Name: &name, Type: &vmType, Location: &location, Tags: tags, VirtualMachineProperties: properties, } expectedVM := virtualMachine{ ID: id, Name: name, Type: vmType, Location: location, OsType: "Linux", Tags: tags, NetworkInterfaces: []string{}, } actualVM := mapFromVM(testVM) if !reflect.DeepEqual(expectedVM, actualVM) { t.Errorf("Expected %v got %v", expectedVM, actualVM) } } func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) { id := "test" name := "name" vmType := "type" location := "westeurope" networkProfile := compute.NetworkProfile{ NetworkInterfaces: &[]compute.NetworkInterfaceReference{}, } properties := &compute.VirtualMachineScaleSetVMProperties{ StorageProfile: &compute.StorageProfile{ OsDisk: &compute.OSDisk{ OsType: "Linux", }, }, NetworkProfile: &networkProfile, } testVM := compute.VirtualMachineScaleSetVM{ ID: &id, Name: &name, Type: &vmType, Location: &location, Tags: nil, VirtualMachineScaleSetVMProperties: properties, } scaleSet := "testSet" expectedVM := virtualMachine{ ID: id, Name: name, Type: vmType, Location: location, OsType: "Linux", Tags: map[string]*string{}, NetworkInterfaces: []string{}, ScaleSet: scaleSet, } actualVM := mapFromVMScaleSetVM(testVM, scaleSet) if !reflect.DeepEqual(expectedVM, actualVM) { t.Errorf("Expected %v got %v", expectedVM, actualVM) } } func TestMapFromVMScaleSetVMWithTags(t *testing.T) { id := "test" name := "name" vmType := "type" location := "westeurope" tags := map[string]*string{ "prometheus": new(string), } networkProfile := compute.NetworkProfile{ NetworkInterfaces: &[]compute.NetworkInterfaceReference{}, } properties := &compute.VirtualMachineScaleSetVMProperties{ StorageProfile: &compute.StorageProfile{ OsDisk: &compute.OSDisk{ OsType: "Linux", }, }, NetworkProfile: &networkProfile, } testVM := compute.VirtualMachineScaleSetVM{ ID: &id, Name: &name, Type: &vmType, Location: &location, Tags: tags, VirtualMachineScaleSetVMProperties: properties, } scaleSet := "testSet" expectedVM := virtualMachine{ ID: id, Name: name, Type: vmType, Location: location, OsType: "Linux", Tags: tags, NetworkInterfaces: []string{}, ScaleSet: scaleSet, } actualVM := mapFromVMScaleSetVM(testVM, scaleSet) if !reflect.DeepEqual(expectedVM, actualVM) { t.Errorf("Expected %v got %v", expectedVM, actualVM) } } prometheus-2.15.2+ds/discovery/config/000077500000000000000000000000001360540074000176515ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/config/config.go000066400000000000000000000113061360540074000214460ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package config import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/discovery/azure" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/ec2" "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/gce" "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/discovery/marathon" "github.com/prometheus/prometheus/discovery/openstack" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/triton" "github.com/prometheus/prometheus/discovery/zookeeper" ) // ServiceDiscoveryConfig configures lists of different service discovery mechanisms. type ServiceDiscoveryConfig struct { // List of labeled target groups for this job. StaticConfigs []*targetgroup.Group `yaml:"static_configs,omitempty"` // List of DNS service discovery configurations. DNSSDConfigs []*dns.SDConfig `yaml:"dns_sd_configs,omitempty"` // List of file service discovery configurations. FileSDConfigs []*file.SDConfig `yaml:"file_sd_configs,omitempty"` // List of Consul service discovery configurations. ConsulSDConfigs []*consul.SDConfig `yaml:"consul_sd_configs,omitempty"` // List of Serverset service discovery configurations. ServersetSDConfigs []*zookeeper.ServersetSDConfig `yaml:"serverset_sd_configs,omitempty"` // NerveSDConfigs is a list of Nerve service discovery configurations. NerveSDConfigs []*zookeeper.NerveSDConfig `yaml:"nerve_sd_configs,omitempty"` // MarathonSDConfigs is a list of Marathon service discovery configurations. MarathonSDConfigs []*marathon.SDConfig `yaml:"marathon_sd_configs,omitempty"` // List of Kubernetes service discovery configurations. KubernetesSDConfigs []*kubernetes.SDConfig `yaml:"kubernetes_sd_configs,omitempty"` // List of GCE service discovery configurations. GCESDConfigs []*gce.SDConfig `yaml:"gce_sd_configs,omitempty"` // List of EC2 service discovery configurations. EC2SDConfigs []*ec2.SDConfig `yaml:"ec2_sd_configs,omitempty"` // List of OpenStack service discovery configurations. OpenstackSDConfigs []*openstack.SDConfig `yaml:"openstack_sd_configs,omitempty"` // List of Azure service discovery configurations. AzureSDConfigs []*azure.SDConfig `yaml:"azure_sd_configs,omitempty"` // List of Triton service discovery configurations. TritonSDConfigs []*triton.SDConfig `yaml:"triton_sd_configs,omitempty"` } // Validate validates the ServiceDiscoveryConfig. func (c *ServiceDiscoveryConfig) Validate() error { for _, cfg := range c.AzureSDConfigs { if cfg == nil { return errors.New("empty or null section in azure_sd_configs") } } for _, cfg := range c.ConsulSDConfigs { if cfg == nil { return errors.New("empty or null section in consul_sd_configs") } } for _, cfg := range c.DNSSDConfigs { if cfg == nil { return errors.New("empty or null section in dns_sd_configs") } } for _, cfg := range c.EC2SDConfigs { if cfg == nil { return errors.New("empty or null section in ec2_sd_configs") } } for _, cfg := range c.FileSDConfigs { if cfg == nil { return errors.New("empty or null section in file_sd_configs") } } for _, cfg := range c.GCESDConfigs { if cfg == nil { return errors.New("empty or null section in gce_sd_configs") } } for _, cfg := range c.KubernetesSDConfigs { if cfg == nil { return errors.New("empty or null section in kubernetes_sd_configs") } } for _, cfg := range c.MarathonSDConfigs { if cfg == nil { return errors.New("empty or null section in marathon_sd_configs") } } for _, cfg := range c.NerveSDConfigs { if cfg == nil { return errors.New("empty or null section in nerve_sd_configs") } } for _, cfg := range c.OpenstackSDConfigs { if cfg == nil { return errors.New("empty or null section in openstack_sd_configs") } } for _, cfg := range c.ServersetSDConfigs { if cfg == nil { return errors.New("empty or null section in serverset_sd_configs") } } for _, cfg := range c.StaticConfigs { if cfg == nil { return errors.New("empty or null section in static_configs") } } return nil } prometheus-2.15.2+ds/discovery/consul/000077500000000000000000000000001360540074000177075ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/consul/consul.go000066400000000000000000000416311360540074000215460ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package consul import ( "context" "fmt" "net" "net/http" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" consul "github.com/hashicorp/consul/api" conntrack "github.com/mwitkow/go-conntrack" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( watchTimeout = 30 * time.Second retryInterval = 15 * time.Second // addressLabel is the name for the label containing a target's address. addressLabel = model.MetaLabelPrefix + "consul_address" // nodeLabel is the name for the label containing a target's node name. nodeLabel = model.MetaLabelPrefix + "consul_node" // metaDataLabel is the prefix for the labels mapping to a target's metadata. metaDataLabel = model.MetaLabelPrefix + "consul_metadata_" // serviceMetaDataLabel is the prefix for the labels mapping to a target's service metadata. serviceMetaDataLabel = model.MetaLabelPrefix + "consul_service_metadata_" // tagsLabel is the name of the label containing the tags assigned to the target. tagsLabel = model.MetaLabelPrefix + "consul_tags" // serviceLabel is the name of the label containing the service name. serviceLabel = model.MetaLabelPrefix + "consul_service" // serviceAddressLabel is the name of the label containing the (optional) service address. serviceAddressLabel = model.MetaLabelPrefix + "consul_service_address" //servicePortLabel is the name of the label containing the service port. servicePortLabel = model.MetaLabelPrefix + "consul_service_port" // datacenterLabel is the name of the label containing the datacenter ID. datacenterLabel = model.MetaLabelPrefix + "consul_dc" // taggedAddressesLabel is the prefix for the labels mapping to a target's tagged addresses. taggedAddressesLabel = model.MetaLabelPrefix + "consul_tagged_address_" // serviceIDLabel is the name of the label containing the service ID. serviceIDLabel = model.MetaLabelPrefix + "consul_service_id" // Constants for instrumentation. namespace = "prometheus" ) var ( rpcFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, Name: "sd_consul_rpc_failures_total", Help: "The number of Consul RPC call failures.", }) rpcDuration = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: namespace, Name: "sd_consul_rpc_duration_seconds", Help: "The duration of a Consul RPC call in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }, []string{"endpoint", "call"}, ) // DefaultSDConfig is the default Consul SD configuration. DefaultSDConfig = SDConfig{ TagSeparator: ",", Scheme: "http", Server: "localhost:8500", AllowStale: true, RefreshInterval: model.Duration(watchTimeout), } ) // SDConfig is the configuration for Consul service discovery. type SDConfig struct { Server string `yaml:"server,omitempty"` Token config_util.Secret `yaml:"token,omitempty"` Datacenter string `yaml:"datacenter,omitempty"` TagSeparator string `yaml:"tag_separator,omitempty"` Scheme string `yaml:"scheme,omitempty"` Username string `yaml:"username,omitempty"` Password config_util.Secret `yaml:"password,omitempty"` // See https://www.consul.io/docs/internals/consensus.html#consistency-modes, // stale reads are a lot cheaper and are a necessity if you have >5k targets. AllowStale bool `yaml:"allow_stale"` // By default use blocking queries (https://www.consul.io/api/index.html#blocking-queries) // but allow users to throttle updates if necessary. This can be useful because of "bugs" like // https://github.com/hashicorp/consul/issues/3712 which cause an un-necessary // amount of requests on consul. RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` // See https://www.consul.io/api/catalog.html#list-services // The list of services for which targets are discovered. // Defaults to all services if empty. Services []string `yaml:"services,omitempty"` // A list of tags used to filter instances inside a service. Services must contain all tags in the list. ServiceTags []string `yaml:"tags,omitempty"` // Desired node metadata. NodeMeta map[string]string `yaml:"node_meta,omitempty"` TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if strings.TrimSpace(c.Server) == "" { return errors.New("consul SD configuration requires a server address") } return nil } func init() { prometheus.MustRegister(rpcFailuresCount) prometheus.MustRegister(rpcDuration) // Initialize metric vectors. rpcDuration.WithLabelValues("catalog", "service") rpcDuration.WithLabelValues("catalog", "services") } // Discovery retrieves target information from a Consul server // and updates them via watches. type Discovery struct { client *consul.Client clientDatacenter string tagSeparator string watchedServices []string // Set of services which will be discovered. watchedTags []string // Tags used to filter instances of a service. watchedNodeMeta map[string]string allowStale bool refreshInterval time.Duration finalizer func() logger log.Logger } // NewDiscovery returns a new Discovery for the given config. func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } tls, err := config_util.NewTLSConfig(&conf.TLSConfig) if err != nil { return nil, err } transport := &http.Transport{ IdleConnTimeout: 5 * time.Duration(conf.RefreshInterval), TLSClientConfig: tls, DialContext: conntrack.NewDialContextFunc( conntrack.DialWithTracing(), conntrack.DialWithName("consul_sd"), ), } wrapper := &http.Client{ Transport: transport, Timeout: 35 * time.Second, } clientConf := &consul.Config{ Address: conf.Server, Scheme: conf.Scheme, Datacenter: conf.Datacenter, Token: string(conf.Token), HttpAuth: &consul.HttpBasicAuth{ Username: conf.Username, Password: string(conf.Password), }, HttpClient: wrapper, } client, err := consul.NewClient(clientConf) if err != nil { return nil, err } cd := &Discovery{ client: client, tagSeparator: conf.TagSeparator, watchedServices: conf.Services, watchedTags: conf.ServiceTags, watchedNodeMeta: conf.NodeMeta, allowStale: conf.AllowStale, refreshInterval: time.Duration(conf.RefreshInterval), clientDatacenter: conf.Datacenter, finalizer: transport.CloseIdleConnections, logger: logger, } return cd, nil } // shouldWatch returns whether the service of the given name should be watched. func (d *Discovery) shouldWatch(name string, tags []string) bool { return d.shouldWatchFromName(name) && d.shouldWatchFromTags(tags) } // shouldWatch returns whether the service of the given name should be watched based on its name. func (d *Discovery) shouldWatchFromName(name string) bool { // If there's no fixed set of watched services, we watch everything. if len(d.watchedServices) == 0 { return true } for _, sn := range d.watchedServices { if sn == name { return true } } return false } // shouldWatch returns whether the service of the given name should be watched based on its tags. // This gets called when the user doesn't specify a list of services in order to avoid watching // *all* services. Details in https://github.com/prometheus/prometheus/pull/3814 func (d *Discovery) shouldWatchFromTags(tags []string) bool { // If there's no fixed set of watched tags, we watch everything. if len(d.watchedTags) == 0 { return true } tagOuter: for _, wtag := range d.watchedTags { for _, tag := range tags { if wtag == tag { continue tagOuter } } return false } return true } // Get the local datacenter if not specified. func (d *Discovery) getDatacenter() error { // If the datacenter was not set from clientConf, let's get it from the local Consul agent // (Consul default is to use local node's datacenter if one isn't given for a query). if d.clientDatacenter != "" { return nil } info, err := d.client.Agent().Self() if err != nil { level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) rpcFailuresCount.Inc() return err } dc, ok := info["Config"]["Datacenter"].(string) if !ok { err := errors.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"]) level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) return err } d.clientDatacenter = dc return nil } // Initialize the Discoverer run. func (d *Discovery) initialize(ctx context.Context) { // Loop until we manage to get the local datacenter. for { // We have to check the context at least once. The checks during channel sends // do not guarantee that. select { case <-ctx.Done(): return default: } // Get the local datacenter first, if necessary. err := d.getDatacenter() if err != nil { time.Sleep(retryInterval) continue } // We are good to go. return } } // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if d.finalizer != nil { defer d.finalizer() } d.initialize(ctx) if len(d.watchedServices) == 0 || len(d.watchedTags) != 0 { // We need to watch the catalog. ticker := time.NewTicker(d.refreshInterval) // Watched services and their cancellation functions. services := make(map[string]func()) var lastIndex uint64 for { select { case <-ctx.Done(): ticker.Stop() return default: d.watchServices(ctx, ch, &lastIndex, services) <-ticker.C } } } else { // We only have fully defined services. for _, name := range d.watchedServices { d.watchService(ctx, ch, name) } <-ctx.Done() } } // Watch the catalog for new services we would like to watch. This is called only // when we don't know yet the names of the services and need to ask Consul the // entire list of services. func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) { catalog := d.client.Catalog() level.Debug(d.logger).Log("msg", "Watching services", "tags", d.watchedTags) t0 := time.Now() opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: d.allowStale, NodeMeta: d.watchedNodeMeta, } srvs, meta, err := catalog.Services(opts.WithContext(ctx)) elapsed := time.Since(t0) rpcDuration.WithLabelValues("catalog", "services").Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { case <-ctx.Done(): return default: } if err != nil { level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) rpcFailuresCount.Inc() time.Sleep(retryInterval) return } // If the index equals the previous one, the watch timed out with no update. if meta.LastIndex == *lastIndex { return } *lastIndex = meta.LastIndex // Check for new services. for name := range srvs { // catalog.Service() returns a map of service name to tags, we can use that to watch // only the services that have the tag we are looking for (if specified). // In the future consul will also support server side for service metadata. // https://github.com/hashicorp/consul/issues/1107 if !d.shouldWatch(name, srvs[name]) { continue } if _, ok := services[name]; ok { continue // We are already watching the service. } wctx, cancel := context.WithCancel(ctx) d.watchService(wctx, ch, name) services[name] = cancel } // Check for removed services. for name, cancel := range services { if _, ok := srvs[name]; !ok { // Call the watch cancellation function. cancel() delete(services, name) // Send clearing target group. select { case <-ctx.Done(): return case ch <- []*targetgroup.Group{{Source: name}}: } } } } // consulService contains data belonging to the same service. type consulService struct { name string tags []string labels model.LabelSet discovery *Discovery client *consul.Client tagSeparator string logger log.Logger } // Start watching a service. func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.Group, name string) { srv := &consulService{ discovery: d, client: d.client, name: name, tags: d.watchedTags, labels: model.LabelSet{ serviceLabel: model.LabelValue(name), datacenterLabel: model.LabelValue(d.clientDatacenter), }, tagSeparator: d.tagSeparator, logger: d.logger, } go func() { ticker := time.NewTicker(d.refreshInterval) var lastIndex uint64 catalog := srv.client.Catalog() for { select { case <-ctx.Done(): ticker.Stop() return default: srv.watch(ctx, ch, catalog, &lastIndex) select { case <-ticker.C: case <-ctx.Done(): } } } }() } // Get updates for a service. func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, catalog *consul.Catalog, lastIndex *uint64) { level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", srv.tags) t0 := time.Now() opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: srv.discovery.allowStale, NodeMeta: srv.discovery.watchedNodeMeta, } nodes, meta, err := catalog.ServiceMultipleTags(srv.name, srv.tags, opts.WithContext(ctx)) elapsed := time.Since(t0) rpcDuration.WithLabelValues("catalog", "service").Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { case <-ctx.Done(): return default: // Continue. } if err != nil { level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", srv.tags, "err", err) rpcFailuresCount.Inc() time.Sleep(retryInterval) return } // If the index equals the previous one, the watch timed out with no update. if meta.LastIndex == *lastIndex { return } *lastIndex = meta.LastIndex tgroup := targetgroup.Group{ Source: srv.name, Labels: srv.labels, Targets: make([]model.LabelSet, 0, len(nodes)), } for _, node := range nodes { // We surround the separated list with the separator as well. This way regular expressions // in relabeling rules don't have to consider tag positions. var tags = srv.tagSeparator + strings.Join(node.ServiceTags, srv.tagSeparator) + srv.tagSeparator // If the service address is not empty it should be used instead of the node address // since the service may be registered remotely through a different node. var addr string if node.ServiceAddress != "" { addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort)) } else { addr = net.JoinHostPort(node.Address, fmt.Sprintf("%d", node.ServicePort)) } labels := model.LabelSet{ model.AddressLabel: model.LabelValue(addr), addressLabel: model.LabelValue(node.Address), nodeLabel: model.LabelValue(node.Node), tagsLabel: model.LabelValue(tags), serviceAddressLabel: model.LabelValue(node.ServiceAddress), servicePortLabel: model.LabelValue(strconv.Itoa(node.ServicePort)), serviceIDLabel: model.LabelValue(node.ServiceID), } // Add all key/value pairs from the node's metadata as their own labels. for k, v := range node.NodeMeta { name := strutil.SanitizeLabelName(k) labels[metaDataLabel+model.LabelName(name)] = model.LabelValue(v) } // Add all key/value pairs from the service's metadata as their own labels. for k, v := range node.ServiceMeta { name := strutil.SanitizeLabelName(k) labels[serviceMetaDataLabel+model.LabelName(name)] = model.LabelValue(v) } // Add all key/value pairs from the service's tagged addresses as their own labels. for k, v := range node.TaggedAddresses { name := strutil.SanitizeLabelName(k) labels[taggedAddressesLabel+model.LabelName(name)] = model.LabelValue(v) } tgroup.Targets = append(tgroup.Targets, labels) } select { case <-ctx.Done(): case ch <- []*targetgroup.Group{&tgroup}: } } prometheus-2.15.2+ds/discovery/consul/consul_test.go000066400000000000000000000236731360540074000226130ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package consul import ( "context" "testing" "time" "net/http" "net/http/httptest" "net/url" "github.com/go-kit/kit/log" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" ) func TestConfiguredService(t *testing.T) { conf := &SDConfig{ Services: []string{"configuredServiceName"}} consulDiscovery, err := NewDiscovery(conf, nil) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } if !consulDiscovery.shouldWatch("configuredServiceName", []string{""}) { t.Errorf("Expected service %s to be watched", "configuredServiceName") } if consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) { t.Errorf("Expected service %s to not be watched", "nonConfiguredServiceName") } } func TestConfiguredServiceWithTag(t *testing.T) { conf := &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http"}, } consulDiscovery, err := NewDiscovery(conf, nil) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } if consulDiscovery.shouldWatch("configuredServiceName", []string{""}) { t.Errorf("Expected service %s to not be watched without tag", "configuredServiceName") } if !consulDiscovery.shouldWatch("configuredServiceName", []string{"http"}) { t.Errorf("Expected service %s to be watched with tag %s", "configuredServiceName", "http") } if consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) { t.Errorf("Expected service %s to not be watched without tag", "nonConfiguredServiceName") } if consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{"http"}) { t.Errorf("Expected service %s to not be watched with tag %s", "nonConfiguredServiceName", "http") } } func TestConfiguredServiceWithTags(t *testing.T) { type testcase struct { // What we've configured to watch. conf *SDConfig // The service we're checking if we should watch or not. serviceName string serviceTags []string shouldWatch bool } cases := []testcase{ { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "configuredServiceName", serviceTags: []string{""}, shouldWatch: false, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "configuredServiceName", serviceTags: []string{"http", "v1"}, shouldWatch: true, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "nonConfiguredServiceName", serviceTags: []string{""}, shouldWatch: false, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "nonConfiguredServiceName", serviceTags: []string{"http, v1"}, shouldWatch: false, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "configuredServiceName", serviceTags: []string{"http", "v1", "foo"}, shouldWatch: true, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1", "foo"}, }, serviceName: "configuredServiceName", serviceTags: []string{"http", "v1", "foo"}, shouldWatch: true, }, { conf: &SDConfig{ Services: []string{"configuredServiceName"}, ServiceTags: []string{"http", "v1"}, }, serviceName: "configuredServiceName", serviceTags: []string{"http", "v1", "v1"}, shouldWatch: true, }, } for _, tc := range cases { consulDiscovery, err := NewDiscovery(tc.conf, nil) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } ret := consulDiscovery.shouldWatch(tc.serviceName, tc.serviceTags) if ret != tc.shouldWatch { t.Errorf("Expected should watch? %t, got %t. Watched service and tags: %s %+v, input was %s %+v", tc.shouldWatch, ret, tc.conf.Services, tc.conf.ServiceTags, tc.serviceName, tc.serviceTags) } } } func TestNonConfiguredService(t *testing.T) { conf := &SDConfig{} consulDiscovery, err := NewDiscovery(conf, nil) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } if !consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) { t.Errorf("Expected service %s to be watched", "nonConfiguredServiceName") } } const ( AgentAnswer = `{"Config": {"Datacenter": "test-dc"}}` ServiceTestAnswer = `[{ "ID": "b78c2e48-5ef3-1814-31b8-0d880f50471e", "Node": "node1", "Address": "1.1.1.1", "Datacenter": "test-dc", "TaggedAddresses": {"lan":"192.168.10.10","wan":"10.0.10.10"}, "NodeMeta": {"rack_name": "2304"}, "ServiceID": "test", "ServiceName": "test", "ServiceMeta": {"version":"1.0.0","environment":"stagging"}, "ServiceTags": ["tag1"], "ServicePort": 3341, "CreateIndex": 1, "ModifyIndex": 1 }]` ServicesTestAnswer = `{"test": ["tag1"], "other": ["tag2"]}` ) func newServer(t *testing.T) (*httptest.Server, *SDConfig) { // github.com/hashicorp/consul/testutil/ would be nice but it needs a local consul binary. stub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { response := "" switch r.URL.String() { case "/v1/agent/self": response = AgentAnswer case "/v1/catalog/service/test?node-meta=rack_name%3A2304&stale=&tag=tag1&wait=30000ms": response = ServiceTestAnswer case "/v1/catalog/service/test?wait=30000ms": response = ServiceTestAnswer case "/v1/catalog/service/other?wait=30000ms": response = `[]` case "/v1/catalog/services?node-meta=rack_name%3A2304&stale=&wait=30000ms": response = ServicesTestAnswer case "/v1/catalog/services?wait=30000ms": response = ServicesTestAnswer case "/v1/catalog/services?index=1&node-meta=rack_name%3A2304&stale=&wait=30000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer case "/v1/catalog/services?index=1&wait=30000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer default: t.Errorf("Unhandled consul call: %s", r.URL) } w.Header().Add("X-Consul-Index", "1") w.Write([]byte(response)) })) stuburl, err := url.Parse(stub.URL) testutil.Ok(t, err) config := &SDConfig{ Server: stuburl.Host, Token: "fake-token", RefreshInterval: model.Duration(1 * time.Second), } return stub, config } func newDiscovery(t *testing.T, config *SDConfig) *Discovery { logger := log.NewNopLogger() d, err := NewDiscovery(config, logger) testutil.Ok(t, err) return d } func checkOneTarget(t *testing.T, tg []*targetgroup.Group) { testutil.Equals(t, 1, len(tg)) target := tg[0] testutil.Equals(t, "test-dc", string(target.Labels["__meta_consul_dc"])) testutil.Equals(t, target.Source, string(target.Labels["__meta_consul_service"])) if target.Source == "test" { // test service should have one node. testutil.Assert(t, len(target.Targets) > 0, "Test service should have one node") } } // Watch all the services in the catalog. func TestAllServices(t *testing.T) { stub, config := newServer(t) defer stub.Close() d := newDiscovery(t, config) ctx, cancel := context.WithCancel(context.Background()) ch := make(chan []*targetgroup.Group) go d.Run(ctx, ch) checkOneTarget(t, <-ch) checkOneTarget(t, <-ch) cancel() } // Watch only the test service. func TestOneService(t *testing.T) { stub, config := newServer(t) defer stub.Close() config.Services = []string{"test"} d := newDiscovery(t, config) ctx, cancel := context.WithCancel(context.Background()) ch := make(chan []*targetgroup.Group) go d.Run(ctx, ch) checkOneTarget(t, <-ch) cancel() } // Watch the test service with a specific tag and node-meta. func TestAllOptions(t *testing.T) { stub, config := newServer(t) defer stub.Close() config.Services = []string{"test"} config.NodeMeta = map[string]string{"rack_name": "2304"} config.ServiceTags = []string{"tag1"} config.AllowStale = true config.Token = "fake-token" d := newDiscovery(t, config) ctx, cancel := context.WithCancel(context.Background()) ch := make(chan []*targetgroup.Group) go d.Run(ctx, ch) checkOneTarget(t, <-ch) cancel() } func TestGetDatacenterShouldReturnError(t *testing.T) { for _, tc := range []struct { handler func(http.ResponseWriter, *http.Request) errMessage string }{ { // Define a handler that will return status 500. handler: func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(500) }, errMessage: "Unexpected response code: 500 ()", }, { // Define a handler that will return incorrect response. handler: func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(`{"Config": {"Not-Datacenter": "test-dc"}}`)) }, errMessage: "invalid value '' for Config.Datacenter", }, } { stub := httptest.NewServer(http.HandlerFunc(tc.handler)) stuburl, err := url.Parse(stub.URL) testutil.Ok(t, err) config := &SDConfig{ Server: stuburl.Host, Token: "fake-token", RefreshInterval: model.Duration(1 * time.Second), } defer stub.Close() d := newDiscovery(t, config) // Should be empty if not initialized. testutil.Equals(t, "", d.clientDatacenter) err = d.getDatacenter() // An error should be returned. testutil.Equals(t, tc.errMessage, err.Error()) // Should still be empty. testutil.Equals(t, "", d.clientDatacenter) } } prometheus-2.15.2+ds/discovery/dns/000077500000000000000000000000001360540074000171705ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/dns/dns.go000066400000000000000000000241551360540074000203120ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dns import ( "context" "fmt" "net" "strings" "sync" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/miekg/dns" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" ) const ( resolvConf = "/etc/resolv.conf" dnsNameLabel = model.MetaLabelPrefix + "dns_name" // Constants for instrumentation. namespace = "prometheus" ) var ( dnsSDLookupsCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, Name: "sd_dns_lookups_total", Help: "The number of DNS-SD lookups.", }) dnsSDLookupFailuresCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, Name: "sd_dns_lookup_failures_total", Help: "The number of DNS-SD lookup failures.", }) // DefaultSDConfig is the default DNS SD configuration. DefaultSDConfig = SDConfig{ RefreshInterval: model.Duration(30 * time.Second), Type: "SRV", } ) // SDConfig is the configuration for DNS based service discovery. type SDConfig struct { Names []string `yaml:"names"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Type string `yaml:"type"` Port int `yaml:"port"` // Ignored for SRV records } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.Names) == 0 { return errors.New("DNS-SD config must contain at least one SRV record name") } switch strings.ToUpper(c.Type) { case "SRV": case "A", "AAAA": if c.Port == 0 { return errors.New("a port is required in DNS-SD configs for all record types except SRV") } default: return errors.Errorf("invalid DNS-SD records type %s", c.Type) } return nil } func init() { prometheus.MustRegister(dnsSDLookupFailuresCount) prometheus.MustRegister(dnsSDLookupsCount) } // Discovery periodically performs DNS-SD requests. It implements // the Discoverer interface. type Discovery struct { *refresh.Discovery names []string port int qtype uint16 logger log.Logger lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) } // NewDiscovery returns a new Discovery which periodically refreshes its targets. func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery { if logger == nil { logger = log.NewNopLogger() } qtype := dns.TypeSRV switch strings.ToUpper(conf.Type) { case "A": qtype = dns.TypeA case "AAAA": qtype = dns.TypeAAAA case "SRV": qtype = dns.TypeSRV } d := &Discovery{ names: conf.Names, qtype: qtype, port: conf.Port, logger: logger, lookupFn: lookupWithSearchPath, } d.Discovery = refresh.NewDiscovery( logger, "dns", time.Duration(conf.RefreshInterval), d.refresh, ) return d } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { var ( wg sync.WaitGroup ch = make(chan *targetgroup.Group) tgs = make([]*targetgroup.Group, 0, len(d.names)) ) wg.Add(len(d.names)) for _, name := range d.names { go func(n string) { if err := d.refreshOne(ctx, n, ch); err != nil && err != context.Canceled { level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) } wg.Done() }(name) } go func() { wg.Wait() close(ch) }() for tg := range ch { tgs = append(tgs, tg) } return tgs, nil } func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error { response, err := d.lookupFn(name, d.qtype, d.logger) dnsSDLookupsCount.Inc() if err != nil { dnsSDLookupFailuresCount.Inc() return err } tg := &targetgroup.Group{} hostPort := func(a string, p int) model.LabelValue { return model.LabelValue(net.JoinHostPort(a, fmt.Sprintf("%d", p))) } for _, record := range response.Answer { var target model.LabelValue switch addr := record.(type) { case *dns.SRV: // Remove the final dot from rooted DNS names to make them look more usual. addr.Target = strings.TrimRight(addr.Target, ".") target = hostPort(addr.Target, int(addr.Port)) case *dns.A: target = hostPort(addr.A.String(), d.port) case *dns.AAAA: target = hostPort(addr.AAAA.String(), d.port) default: level.Warn(d.logger).Log("msg", "Invalid SRV record", "record", record) continue } tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: target, dnsNameLabel: model.LabelValue(name), }) } tg.Source = name select { case <-ctx.Done(): return ctx.Err() case ch <- tg: } return nil } // lookupWithSearchPath tries to get an answer for various permutations of // the given name, appending the system-configured search path as necessary. // // There are three possible outcomes: // // 1. One of the permutations of the given name is recognized as // "valid" by the DNS, in which case we consider ourselves "done" // and that answer is returned. Note that, due to the way the DNS // handles "name has resource records, but none of the specified type", // the answer received may have an empty set of results. // // 2. All of the permutations of the given name are responded to by one of // the servers in the "nameservers" list with the answer "that name does // not exist" (NXDOMAIN). In that case, it can be considered // pseudo-authoritative that there are no records for that name. // // 3. One or more of the names was responded to by all servers with some // sort of error indication. In that case, we can't know if, in fact, // there are records for the name or not, so whatever state the // configuration is in, we should keep it that way until we know for // sure (by, presumably, all the names getting answers in the future). // // Outcomes 1 and 2 are indicated by a valid response message (possibly an // empty one) and no error. Outcome 3 is indicated by an error return. The // error will be generic-looking, because trying to return all the errors // returned by the combination of all name permutations and servers is a // nightmare. func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { conf, err := dns.ClientConfigFromFile(resolvConf) if err != nil { return nil, errors.Wrap(err, "could not load resolv.conf") } allResponsesValid := true for _, lname := range conf.NameList(name) { response, err := lookupFromAnyServer(lname, qtype, conf, logger) if err != nil { // We can't go home yet, because a later name // may give us a valid, successful answer. However // we can no longer say "this name definitely doesn't // exist", because we did not get that answer for // at least one name. allResponsesValid = false } else if response.Rcode == dns.RcodeSuccess { // Outcome 1: GOLD! return response, nil } } if allResponsesValid { // Outcome 2: everyone says NXDOMAIN, that's good enough for me return &dns.Msg{}, nil } // Outcome 3: boned. return nil, errors.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name) } // lookupFromAnyServer uses all configured servers to try and resolve a specific // name. If a viable answer is received from a server, then it is // immediately returned, otherwise the other servers in the config are // tried, and if none of them return a viable answer, an error is returned. // // A "viable answer" is one which indicates either: // // 1. "yes, I know that name, and here are its records of the requested type" // (RCODE==SUCCESS, ANCOUNT > 0); // 2. "yes, I know that name, but it has no records of the requested type" // (RCODE==SUCCESS, ANCOUNT==0); or // 3. "I know that name doesn't exist" (RCODE==NXDOMAIN). // // A non-viable answer is "anything else", which encompasses both various // system-level problems (like network timeouts) and also // valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc). func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) { client := &dns.Client{} for _, server := range conf.Servers { servAddr := net.JoinHostPort(server, conf.Port) msg, err := askServerForName(name, qtype, client, servAddr, true) if err != nil { level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err) continue } if msg.Rcode == dns.RcodeSuccess || msg.Rcode == dns.RcodeNameError { // We have our answer. Time to go home. return msg, nil } } return nil, errors.Errorf("could not resolve %s: no servers returned a viable answer", name) } // askServerForName makes a request to a specific DNS server for a specific // name (and qtype). Retries with TCP in the event of response truncation, // but otherwise just sends back whatever the server gave, whether that be a // valid-looking response, or an error. func askServerForName(name string, queryType uint16, client *dns.Client, servAddr string, edns bool) (*dns.Msg, error) { msg := &dns.Msg{} msg.SetQuestion(dns.Fqdn(name), queryType) if edns { msg.SetEdns0(dns.DefaultMsgSize, false) } response, _, err := client.Exchange(msg, servAddr) if err != nil { return nil, err } if response.Truncated { if client.Net == "tcp" { return nil, errors.New("got truncated message on TCP (64kiB limit exceeded?)") } client.Net = "tcp" return askServerForName(name, queryType, client, servAddr, false) } return response, nil } prometheus-2.15.2+ds/discovery/dns/dns_test.go000066400000000000000000000150101360540074000213370ustar00rootroot00000000000000// Copyright 2019 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package dns import ( "context" "fmt" "net" "testing" "time" "github.com/go-kit/kit/log" "github.com/miekg/dns" "gopkg.in/yaml.v2" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" ) func TestDNS(t *testing.T) { testCases := []struct { name string config SDConfig lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) expected []*targetgroup.Group }{ { name: "A record query with error", config: SDConfig{ Names: []string{"web.example.com."}, RefreshInterval: model.Duration(time.Minute), Port: 80, Type: "A", }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return nil, fmt.Errorf("some error") }, expected: []*targetgroup.Group{}, }, { name: "A record query", config: SDConfig{ Names: []string{"web.example.com."}, RefreshInterval: model.Duration(time.Minute), Port: 80, Type: "A", }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.A{A: net.IPv4(192, 0, 2, 2)}, }, }, nil }, expected: []*targetgroup.Group{ { Source: "web.example.com.", Targets: []model.LabelSet{ {"__address__": "192.0.2.2:80", "__meta_dns_name": "web.example.com."}, }, }, }, }, { name: "AAAA record query", config: SDConfig{ Names: []string{"web.example.com."}, RefreshInterval: model.Duration(time.Minute), Port: 80, Type: "AAAA", }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.AAAA{AAAA: net.IPv6loopback}, }, }, nil }, expected: []*targetgroup.Group{ { Source: "web.example.com.", Targets: []model.LabelSet{ {"__address__": "[::1]:80", "__meta_dns_name": "web.example.com."}, }, }, }, }, { name: "SRV record query", config: SDConfig{ Names: []string{"_mysql._tcp.db.example.com."}, Type: "SRV", RefreshInterval: model.Duration(time.Minute), }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, &dns.SRV{Port: 3306, Target: "db2.example.com."}, }, }, nil }, expected: []*targetgroup.Group{ { Source: "_mysql._tcp.db.example.com.", Targets: []model.LabelSet{ {"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."}, {"__address__": "db2.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."}, }, }, }, }, { name: "SRV record query with unsupported resource records", config: SDConfig{ Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, &dns.TXT{Txt: []string{"this should be discarded"}}, }, }, nil }, expected: []*targetgroup.Group{ { Source: "_mysql._tcp.db.example.com.", Targets: []model.LabelSet{ {"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."}, }, }, }, }, { name: "SRV record query with empty answer (NXDOMAIN)", config: SDConfig{ Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { return &dns.Msg{}, nil }, expected: []*targetgroup.Group{ { Source: "_mysql._tcp.db.example.com.", }, }, }, } for _, tc := range testCases { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() sd := NewDiscovery(tc.config, nil) sd.lookupFn = tc.lookup tgs, err := sd.refresh(context.Background()) testutil.Ok(t, err) testutil.Equals(t, tc.expected, tgs) }) } } func TestSDConfigUnmarshalYAML(t *testing.T) { marshal := func(c SDConfig) []byte { d, err := yaml.Marshal(c) if err != nil { panic(err) } return d } unmarshal := func(d []byte) func(interface{}) error { return func(o interface{}) error { return yaml.Unmarshal(d, o) } } cases := []struct { name string input SDConfig expectErr bool }{ { name: "valid srv", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "SRV", }, expectErr: false, }, { name: "valid a", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "A", Port: 5300, }, expectErr: false, }, { name: "valid aaaa", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "AAAA", Port: 5300, }, expectErr: false, }, { name: "invalid a without port", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "A", }, expectErr: true, }, { name: "invalid aaaa without port", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "AAAA", }, expectErr: true, }, { name: "invalid empty names", input: SDConfig{ Names: []string{}, Type: "AAAA", }, expectErr: true, }, { name: "invalid unknown dns type", input: SDConfig{ Names: []string{"a.example.com", "b.example.com"}, Type: "PTR", }, expectErr: true, }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { var config SDConfig d := marshal(c.input) err := config.UnmarshalYAML(unmarshal(d)) testutil.Equals(t, c.expectErr, err != nil) }) } } prometheus-2.15.2+ds/discovery/ec2/000077500000000000000000000000001360540074000170555ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/ec2/ec2.go000066400000000000000000000173251360540074000200650ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package ec2 import ( "context" "fmt" "net" "strings" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials/stscreds" "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" "github.com/go-kit/kit/log" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( ec2Label = model.MetaLabelPrefix + "ec2_" ec2LabelAZ = ec2Label + "availability_zone" ec2LabelInstanceID = ec2Label + "instance_id" ec2LabelInstanceState = ec2Label + "instance_state" ec2LabelInstanceType = ec2Label + "instance_type" ec2LabelOwnerID = ec2Label + "owner_id" ec2LabelPlatform = ec2Label + "platform" ec2LabelPublicDNS = ec2Label + "public_dns_name" ec2LabelPublicIP = ec2Label + "public_ip" ec2LabelPrivateDNS = ec2Label + "private_dns_name" ec2LabelPrivateIP = ec2Label + "private_ip" ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id" ec2LabelSubnetID = ec2Label + "subnet_id" ec2LabelTag = ec2Label + "tag_" ec2LabelVPCID = ec2Label + "vpc_id" subnetSeparator = "," ) // DefaultSDConfig is the default EC2 SD configuration. var DefaultSDConfig = SDConfig{ Port: 80, RefreshInterval: model.Duration(60 * time.Second), } // Filter is the configuration for filtering EC2 instances. type Filter struct { Name string `yaml:"name"` Values []string `yaml:"values"` } // SDConfig is the configuration for EC2 based service discovery. type SDConfig struct { Endpoint string `yaml:"endpoint"` Region string `yaml:"region"` AccessKey string `yaml:"access_key,omitempty"` SecretKey config_util.Secret `yaml:"secret_key,omitempty"` Profile string `yaml:"profile,omitempty"` RoleARN string `yaml:"role_arn,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Port int `yaml:"port"` Filters []*Filter `yaml:"filters"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if c.Region == "" { sess, err := session.NewSession() if err != nil { return err } metadata := ec2metadata.New(sess) region, err := metadata.Region() if err != nil { return errors.New("EC2 SD configuration requires a region") } c.Region = region } for _, f := range c.Filters { if len(f.Values) == 0 { return errors.New("EC2 SD configuration filter values cannot be empty") } } return nil } // Discovery periodically performs EC2-SD requests. It implements // the Discoverer interface. type Discovery struct { *refresh.Discovery aws *aws.Config interval time.Duration profile string roleARN string port int filters []*Filter } // NewDiscovery returns a new EC2Discovery which periodically refreshes its targets. func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery { creds := credentials.NewStaticCredentials(conf.AccessKey, string(conf.SecretKey), "") if conf.AccessKey == "" && conf.SecretKey == "" { creds = nil } if logger == nil { logger = log.NewNopLogger() } d := &Discovery{ aws: &aws.Config{ Endpoint: &conf.Endpoint, Region: &conf.Region, Credentials: creds, }, profile: conf.Profile, roleARN: conf.RoleARN, filters: conf.Filters, interval: time.Duration(conf.RefreshInterval), port: conf.Port, } d.Discovery = refresh.NewDiscovery( logger, "ec2", time.Duration(conf.RefreshInterval), d.refresh, ) return d } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { sess, err := session.NewSessionWithOptions(session.Options{ Config: *d.aws, Profile: d.profile, }) if err != nil { return nil, errors.Wrap(err, "could not create aws session") } var ec2s *ec2.EC2 if d.roleARN != "" { creds := stscreds.NewCredentials(sess, d.roleARN) ec2s = ec2.New(sess, &aws.Config{Credentials: creds}) } else { ec2s = ec2.New(sess) } tg := &targetgroup.Group{ Source: *d.aws.Region, } var filters []*ec2.Filter for _, f := range d.filters { filters = append(filters, &ec2.Filter{ Name: aws.String(f.Name), Values: aws.StringSlice(f.Values), }) } input := &ec2.DescribeInstancesInput{Filters: filters} if err = ec2s.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { for _, r := range p.Reservations { for _, inst := range r.Instances { if inst.PrivateIpAddress == nil { continue } labels := model.LabelSet{ ec2LabelInstanceID: model.LabelValue(*inst.InstanceId), } if r.OwnerId != nil { labels[ec2LabelOwnerID] = model.LabelValue(*r.OwnerId) } labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress) if inst.PrivateDnsName != nil { labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName) } addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.Platform != nil { labels[ec2LabelPlatform] = model.LabelValue(*inst.Platform) } if inst.PublicIpAddress != nil { labels[ec2LabelPublicIP] = model.LabelValue(*inst.PublicIpAddress) labels[ec2LabelPublicDNS] = model.LabelValue(*inst.PublicDnsName) } labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name) labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType) if inst.VpcId != nil { labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId) labels[ec2LabelPrimarySubnetID] = model.LabelValue(*inst.SubnetId) // Deduplicate VPC Subnet IDs maintaining the order of the network interfaces returned by EC2. var subnets []string subnetsMap := make(map[string]struct{}) for _, eni := range inst.NetworkInterfaces { if eni.SubnetId == nil { continue } if _, ok := subnetsMap[*eni.SubnetId]; !ok { subnetsMap[*eni.SubnetId] = struct{}{} subnets = append(subnets, *eni.SubnetId) } } labels[ec2LabelSubnetID] = model.LabelValue( subnetSeparator + strings.Join(subnets, subnetSeparator) + subnetSeparator) } for _, t := range inst.Tags { if t == nil || t.Key == nil || t.Value == nil { continue } name := strutil.SanitizeLabelName(*t.Key) labels[ec2LabelTag+model.LabelName(name)] = model.LabelValue(*t.Value) } tg.Targets = append(tg.Targets, labels) } } return true }); err != nil { return nil, errors.Wrap(err, "could not describe instances") } return []*targetgroup.Group{tg}, nil } prometheus-2.15.2+ds/discovery/file/000077500000000000000000000000001360540074000173235ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/file/file.go000066400000000000000000000254041360540074000205760ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package file import ( "context" "encoding/json" "fmt" "io/ioutil" "os" "path/filepath" "regexp" "strings" "sync" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" fsnotify "gopkg.in/fsnotify/fsnotify.v1" yaml "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery/targetgroup" ) var ( patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) // DefaultSDConfig is the default file SD configuration. DefaultSDConfig = SDConfig{ RefreshInterval: model.Duration(5 * time.Minute), } ) // SDConfig is the configuration for file based discovery. type SDConfig struct { Files []string `yaml:"files"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.Files) == 0 { return errors.New("file service discovery config must contain at least one path name") } for _, name := range c.Files { if !patFileSDName.MatchString(name) { return errors.Errorf("path name %q is not valid for file discovery", name) } } return nil } const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath" // TimestampCollector is a Custom Collector for Timestamps of the files. type TimestampCollector struct { Description *prometheus.Desc discoverers map[*Discovery]struct{} lock sync.RWMutex } // Describe method sends the description to the channel. func (t *TimestampCollector) Describe(ch chan<- *prometheus.Desc) { ch <- t.Description } // Collect creates constant metrics for each file with last modified time of the file. func (t *TimestampCollector) Collect(ch chan<- prometheus.Metric) { // New map to dedup filenames. uniqueFiles := make(map[string]float64) t.lock.RLock() for fileSD := range t.discoverers { fileSD.lock.RLock() for filename, timestamp := range fileSD.timestamps { uniqueFiles[filename] = timestamp } fileSD.lock.RUnlock() } t.lock.RUnlock() for filename, timestamp := range uniqueFiles { ch <- prometheus.MustNewConstMetric( t.Description, prometheus.GaugeValue, timestamp, filename, ) } } func (t *TimestampCollector) addDiscoverer(disc *Discovery) { t.lock.Lock() t.discoverers[disc] = struct{}{} t.lock.Unlock() } func (t *TimestampCollector) removeDiscoverer(disc *Discovery) { t.lock.Lock() delete(t.discoverers, disc) t.lock.Unlock() } // NewTimestampCollector creates a TimestampCollector. func NewTimestampCollector() *TimestampCollector { return &TimestampCollector{ Description: prometheus.NewDesc( "prometheus_sd_file_mtime_seconds", "Timestamp (mtime) of files read by FileSD. Timestamp is set at read time.", []string{"filename"}, nil, ), discoverers: make(map[*Discovery]struct{}), } } var ( fileSDScanDuration = prometheus.NewSummary( prometheus.SummaryOpts{ Name: "prometheus_sd_file_scan_duration_seconds", Help: "The duration of the File-SD scan in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }) fileSDReadErrorsCount = prometheus.NewCounter( prometheus.CounterOpts{ Name: "prometheus_sd_file_read_errors_total", Help: "The number of File-SD read errors.", }) fileSDTimeStamp = NewTimestampCollector() ) func init() { prometheus.MustRegister(fileSDScanDuration) prometheus.MustRegister(fileSDReadErrorsCount) prometheus.MustRegister(fileSDTimeStamp) } // Discovery provides service discovery functionality based // on files that contain target groups in JSON or YAML format. Refreshing // happens using file watches and periodic refreshes. type Discovery struct { paths []string watcher *fsnotify.Watcher interval time.Duration timestamps map[string]float64 lock sync.RWMutex // lastRefresh stores which files were found during the last refresh // and how many target groups they contained. // This is used to detect deleted target groups. lastRefresh map[string]int logger log.Logger } // NewDiscovery returns a new file discovery for the given paths. func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery { if logger == nil { logger = log.NewNopLogger() } disc := &Discovery{ paths: conf.Files, interval: time.Duration(conf.RefreshInterval), timestamps: make(map[string]float64), logger: logger, } fileSDTimeStamp.addDiscoverer(disc) return disc } // listFiles returns a list of all files that match the configured patterns. func (d *Discovery) listFiles() []string { var paths []string for _, p := range d.paths { files, err := filepath.Glob(p) if err != nil { level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err) continue } paths = append(paths, files...) } return paths } // watchFiles sets watches on all full paths or directories that were configured for // this file discovery. func (d *Discovery) watchFiles() { if d.watcher == nil { panic("no watcher configured") } for _, p := range d.paths { if idx := strings.LastIndex(p, "/"); idx > -1 { p = p[:idx] } else { p = "./" } if err := d.watcher.Add(p); err != nil { level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err) } } } // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { watcher, err := fsnotify.NewWatcher() if err != nil { level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err) return } d.watcher = watcher defer d.stop() d.refresh(ctx, ch) ticker := time.NewTicker(d.interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case event := <-d.watcher.Events: // fsnotify sometimes sends a bunch of events without name or operation. // It's unclear what they are and why they are sent - filter them out. if len(event.Name) == 0 { break } // Everything but a chmod requires rereading. if event.Op^fsnotify.Chmod == 0 { break } // Changes to a file can spawn various sequences of events with // different combinations of operations. For all practical purposes // this is inaccurate. // The most reliable solution is to reload everything if anything happens. d.refresh(ctx, ch) case <-ticker.C: // Setting a new watch after an update might fail. Make sure we don't lose // those files forever. d.refresh(ctx, ch) case err := <-d.watcher.Errors: if err != nil { level.Error(d.logger).Log("msg", "Error watching file", "err", err) } } } } func (d *Discovery) writeTimestamp(filename string, timestamp float64) { d.lock.Lock() d.timestamps[filename] = timestamp d.lock.Unlock() } func (d *Discovery) deleteTimestamp(filename string) { d.lock.Lock() delete(d.timestamps, filename) d.lock.Unlock() } // stop shuts down the file watcher. func (d *Discovery) stop() { level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) done := make(chan struct{}) defer close(done) fileSDTimeStamp.removeDiscoverer(d) // Closing the watcher will deadlock unless all events and errors are drained. go func() { for { select { case <-d.watcher.Errors: case <-d.watcher.Events: // Drain all events and errors. case <-done: return } } }() if err := d.watcher.Close(); err != nil { level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) } level.Debug(d.logger).Log("msg", "File discovery stopped") } // refresh reads all files matching the discovery's patterns and sends the respective // updated target groups through the channel. func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) { t0 := time.Now() defer func() { fileSDScanDuration.Observe(time.Since(t0).Seconds()) }() ref := map[string]int{} for _, p := range d.listFiles() { tgroups, err := d.readFile(p) if err != nil { fileSDReadErrorsCount.Inc() level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err) // Prevent deletion down below. ref[p] = d.lastRefresh[p] continue } select { case ch <- tgroups: case <-ctx.Done(): return } ref[p] = len(tgroups) } // Send empty updates for sources that disappeared. for f, n := range d.lastRefresh { m, ok := ref[f] if !ok || n > m { level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f) d.deleteTimestamp(f) for i := m; i < n; i++ { select { case ch <- []*targetgroup.Group{{Source: fileSource(f, i)}}: case <-ctx.Done(): return } } } } d.lastRefresh = ref d.watchFiles() } // readFile reads a JSON or YAML list of targets groups from the file, depending on its // file extension. It returns full configuration target groups. func (d *Discovery) readFile(filename string) ([]*targetgroup.Group, error) { fd, err := os.Open(filename) if err != nil { return nil, err } defer fd.Close() content, err := ioutil.ReadAll(fd) if err != nil { return nil, err } info, err := fd.Stat() if err != nil { return nil, err } var targetGroups []*targetgroup.Group switch ext := filepath.Ext(filename); strings.ToLower(ext) { case ".json": if err := json.Unmarshal(content, &targetGroups); err != nil { return nil, err } case ".yml", ".yaml": if err := yaml.UnmarshalStrict(content, &targetGroups); err != nil { return nil, err } default: panic(errors.Errorf("discovery.File.readFile: unhandled file extension %q", ext)) } for i, tg := range targetGroups { if tg == nil { err = errors.New("nil target group item found") return nil, err } tg.Source = fileSource(filename, i) if tg.Labels == nil { tg.Labels = model.LabelSet{} } tg.Labels[fileSDFilepathLabel] = model.LabelValue(filename) } d.writeTimestamp(filename, float64(info.ModTime().Unix())) return targetGroups, nil } // fileSource returns a source ID for the i-th target group in the file. func fileSource(filename string, i int) string { return fmt.Sprintf("%s:%d", filename, i) } prometheus-2.15.2+ds/discovery/file/file_test.go000066400000000000000000000255301360540074000216350ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package file import ( "context" "encoding/json" "io" "io/ioutil" "os" "path/filepath" "sort" "sync" "testing" "time" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" ) const defaultWait = time.Second type testRunner struct { *testing.T dir string ch chan []*targetgroup.Group done, stopped chan struct{} cancelSD context.CancelFunc mtx sync.Mutex tgs map[string]*targetgroup.Group receivedAt time.Time } func newTestRunner(t *testing.T) *testRunner { t.Helper() tmpDir, err := ioutil.TempDir("", "prometheus-file-sd") testutil.Ok(t, err) return &testRunner{ T: t, dir: tmpDir, ch: make(chan []*targetgroup.Group), done: make(chan struct{}), stopped: make(chan struct{}), tgs: make(map[string]*targetgroup.Group), } } // copyFile atomically copies a file to the runner's directory. func (t *testRunner) copyFile(src string) string { t.Helper() return t.copyFileTo(src, filepath.Base(src)) } // copyFileTo atomically copies a file with a different name to the runner's directory. func (t *testRunner) copyFileTo(src string, name string) string { t.Helper() newf, err := ioutil.TempFile(t.dir, "") testutil.Ok(t, err) f, err := os.Open(src) testutil.Ok(t, err) _, err = io.Copy(newf, f) testutil.Ok(t, err) testutil.Ok(t, f.Close()) dst := filepath.Join(t.dir, name) err = os.Rename(newf.Name(), dst) testutil.Ok(t, err) return dst } // writeString writes atomically a string to a file. func (t *testRunner) writeString(file string, data string) { t.Helper() newf, err := ioutil.TempFile(t.dir, "") testutil.Ok(t, err) _, err = newf.WriteString(data) testutil.Ok(t, err) testutil.Ok(t, newf.Close()) err = os.Rename(newf.Name(), file) testutil.Ok(t, err) } // appendString appends a string to a file. func (t *testRunner) appendString(file, data string) { t.Helper() f, err := os.OpenFile(file, os.O_WRONLY|os.O_APPEND, 0) testutil.Ok(t, err) defer f.Close() _, err = f.WriteString(data) testutil.Ok(t, err) } // run starts the file SD and the loop receiving target groups updates. func (t *testRunner) run(files ...string) { go func() { defer close(t.stopped) for { select { case <-t.done: os.RemoveAll(t.dir) return case tgs := <-t.ch: t.mtx.Lock() t.receivedAt = time.Now() for _, tg := range tgs { t.tgs[tg.Source] = tg } t.mtx.Unlock() } } }() for i := range files { files[i] = filepath.Join(t.dir, files[i]) } ctx, cancel := context.WithCancel(context.Background()) t.cancelSD = cancel go func() { NewDiscovery( &SDConfig{ Files: files, // Setting a high refresh interval to make sure that the tests only // rely on file watches. RefreshInterval: model.Duration(1 * time.Hour), }, nil, ).Run(ctx, t.ch) }() } func (t *testRunner) stop() { t.cancelSD() close(t.done) <-t.stopped } func (t *testRunner) lastReceive() time.Time { t.mtx.Lock() defer t.mtx.Unlock() return t.receivedAt } func (t *testRunner) targets() []*targetgroup.Group { t.mtx.Lock() defer t.mtx.Unlock() var ( keys []string tgs []*targetgroup.Group ) for k := range t.tgs { keys = append(keys, k) } sort.Strings(keys) for _, k := range keys { tgs = append(tgs, t.tgs[k]) } return tgs } func (t *testRunner) requireUpdate(ref time.Time, expected []*targetgroup.Group) { t.Helper() for { select { case <-time.After(defaultWait): t.Fatalf("Expected update but got none") return case <-time.After(defaultWait / 10): if ref.Equal(t.lastReceive()) { // No update received. break } // We can receive partial updates so only check the result when the // expected number of groups is reached. tgs := t.targets() if len(tgs) != len(expected) { t.Logf("skipping update: expected %d targets, got %d", len(expected), len(tgs)) break } t.requireTargetGroups(expected, tgs) if ref.After(time.Time{}) { t.Logf("update received after %v", t.lastReceive().Sub(ref)) } return } } } func (t *testRunner) requireTargetGroups(expected, got []*targetgroup.Group) { t.Helper() b1, err := json.Marshal(expected) if err != nil { panic(err) } b2, err := json.Marshal(got) if err != nil { panic(err) } testutil.Equals(t, string(b1), string(b2)) } // validTg() maps to fixtures/valid.{json,yml}. func validTg(file string) []*targetgroup.Group { return []*targetgroup.Group{ &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("localhost:9090"), }, { model.AddressLabel: model.LabelValue("example.org:443"), }, }, Labels: model.LabelSet{ model.LabelName("foo"): model.LabelValue("bar"), fileSDFilepathLabel: model.LabelValue(file), }, Source: fileSource(file, 0), }, &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("my.domain"), }, }, Labels: model.LabelSet{ fileSDFilepathLabel: model.LabelValue(file), }, Source: fileSource(file, 1), }, } } // valid2Tg() maps to fixtures/valid2.{json,yml}. func valid2Tg(file string) []*targetgroup.Group { return []*targetgroup.Group{ &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("my.domain"), }, }, Labels: model.LabelSet{ fileSDFilepathLabel: model.LabelValue(file), }, Source: fileSource(file, 0), }, &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("localhost:9090"), }, }, Labels: model.LabelSet{ model.LabelName("foo"): model.LabelValue("bar"), model.LabelName("fred"): model.LabelValue("baz"), fileSDFilepathLabel: model.LabelValue(file), }, Source: fileSource(file, 1), }, &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("example.org:443"), }, }, Labels: model.LabelSet{ model.LabelName("scheme"): model.LabelValue("https"), fileSDFilepathLabel: model.LabelValue(file), }, Source: fileSource(file, 2), }, } } func TestInitialUpdate(t *testing.T) { for _, tc := range []string{ "fixtures/valid.yml", "fixtures/valid.json", } { t.Run(tc, func(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile(tc) runner.run("*" + filepath.Ext(tc)) defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) }) } } func TestInvalidFile(t *testing.T) { for _, tc := range []string{ "fixtures/invalid_nil.yml", "fixtures/invalid_nil.json", } { tc := tc t.Run(tc, func(t *testing.T) { t.Parallel() now := time.Now() runner := newTestRunner(t) runner.copyFile(tc) runner.run("*" + filepath.Ext(tc)) defer runner.stop() // Verify that we've received nothing. time.Sleep(defaultWait) if runner.lastReceive().After(now) { t.Fatalf("unexpected targets received: %v", runner.targets()) } }) } } func TestNoopFileUpdate(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile("fixtures/valid.yml") runner.run("*.yml") defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) // Verify that we receive an update with the same target groups. ref := runner.lastReceive() runner.copyFileTo("fixtures/valid3.yml", "valid.yml") runner.requireUpdate(ref, validTg(sdFile)) } func TestFileUpdate(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile("fixtures/valid.yml") runner.run("*.yml") defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) // Verify that we receive an update with the new target groups. ref := runner.lastReceive() runner.copyFileTo("fixtures/valid2.yml", "valid.yml") runner.requireUpdate(ref, valid2Tg(sdFile)) } func TestInvalidFileUpdate(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile("fixtures/valid.yml") runner.run("*.yml") defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) ref := runner.lastReceive() runner.writeString(sdFile, "]gibberish\n][") // Verify that we receive nothing or the same targets as before. time.Sleep(defaultWait) if runner.lastReceive().After(ref) { runner.requireTargetGroups(validTg(sdFile), runner.targets()) } } func TestUpdateFileWithPartialWrites(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile("fixtures/valid.yml") runner.run("*.yml") defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) // Do a partial write operation. ref := runner.lastReceive() runner.writeString(sdFile, "- targets") time.Sleep(defaultWait) // Verify that we receive nothing or the same target groups as before. if runner.lastReceive().After(ref) { runner.requireTargetGroups(validTg(sdFile), runner.targets()) } // Verify that we receive the update target groups once the file is a valid YAML payload. ref = runner.lastReceive() runner.appendString(sdFile, `: ["localhost:9091"]`) runner.requireUpdate(ref, []*targetgroup.Group{ &targetgroup.Group{ Targets: []model.LabelSet{ { model.AddressLabel: model.LabelValue("localhost:9091"), }, }, Labels: model.LabelSet{ fileSDFilepathLabel: model.LabelValue(sdFile), }, Source: fileSource(sdFile, 0), }, &targetgroup.Group{ Source: fileSource(sdFile, 1), }, }, ) } func TestRemoveFile(t *testing.T) { t.Parallel() runner := newTestRunner(t) sdFile := runner.copyFile("fixtures/valid.yml") runner.run("*.yml") defer runner.stop() // Verify that we receive the initial target groups. runner.requireUpdate(time.Time{}, validTg(sdFile)) // Verify that we receive the update about the target groups being removed. ref := runner.lastReceive() testutil.Ok(t, os.Remove(sdFile)) runner.requireUpdate( ref, []*targetgroup.Group{ &targetgroup.Group{ Source: fileSource(sdFile, 0), }, &targetgroup.Group{ Source: fileSource(sdFile, 1), }}, ) } prometheus-2.15.2+ds/discovery/file/fixtures/000077500000000000000000000000001360540074000211745ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/file/fixtures/invalid_nil.json000066400000000000000000000002131360540074000243530ustar00rootroot00000000000000[ { "targets": ["localhost:9090", "example.org:443"], "labels": { "foo": "bar" } }, null ] prometheus-2.15.2+ds/discovery/file/fixtures/invalid_nil.yml000066400000000000000000000001201360540074000242000ustar00rootroot00000000000000- targets: ['localhost:9090', 'example.org:443'] labels: foo: bar - null prometheus-2.15.2+ds/discovery/file/fixtures/valid.json000066400000000000000000000002021360540074000231600ustar00rootroot00000000000000[ { "targets": ["localhost:9090", "example.org:443"], "labels": { "foo": "bar" } }, { "targets": ["my.domain"] } ] prometheus-2.15.2+ds/discovery/file/fixtures/valid.yml000066400000000000000000000001421360540074000230130ustar00rootroot00000000000000- targets: ['localhost:9090', 'example.org:443'] labels: foo: bar - targets: ['my.domain'] prometheus-2.15.2+ds/discovery/file/fixtures/valid2.yml000066400000000000000000000002271360540074000231010ustar00rootroot00000000000000- targets: ['my.domain'] - targets: ['localhost:9090'] labels: foo: bar fred: baz - targets: ['example.org:443'] labels: scheme: https prometheus-2.15.2+ds/discovery/file/fixtures/valid3.yml000066400000000000000000000002611360540074000231000ustar00rootroot00000000000000# the YAML structure is identical to valid.yml but the raw data is different. - targets: ['localhost:9090', 'example.org:443'] labels: foo: bar - targets: ['my.domain'] prometheus-2.15.2+ds/discovery/gce/000077500000000000000000000000001360540074000171425ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/gce/gce.go000066400000000000000000000154741360540074000202420ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package gce import ( "context" "fmt" "net/http" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/pkg/errors" "github.com/prometheus/common/model" "golang.org/x/oauth2/google" compute "google.golang.org/api/compute/v1" "google.golang.org/api/option" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( gceLabel = model.MetaLabelPrefix + "gce_" gceLabelProject = gceLabel + "project" gceLabelZone = gceLabel + "zone" gceLabelNetwork = gceLabel + "network" gceLabelSubnetwork = gceLabel + "subnetwork" gceLabelPublicIP = gceLabel + "public_ip" gceLabelPrivateIP = gceLabel + "private_ip" gceLabelInstanceID = gceLabel + "instance_id" gceLabelInstanceName = gceLabel + "instance_name" gceLabelInstanceStatus = gceLabel + "instance_status" gceLabelTags = gceLabel + "tags" gceLabelMetadata = gceLabel + "metadata_" gceLabelLabel = gceLabel + "label_" gceLabelMachineType = gceLabel + "machine_type" ) // DefaultSDConfig is the default GCE SD configuration. var DefaultSDConfig = SDConfig{ Port: 80, TagSeparator: ",", RefreshInterval: model.Duration(60 * time.Second), } // SDConfig is the configuration for GCE based service discovery. type SDConfig struct { // Project: The Google Cloud Project ID Project string `yaml:"project"` // Zone: The zone of the scrape targets. // If you need to configure multiple zones use multiple gce_sd_configs Zone string `yaml:"zone"` // Filter: Can be used optionally to filter the instance list by other criteria. // Syntax of this filter string is described here in the filter query parameter section: // https://cloud.google.com/compute/docs/reference/latest/instances/list Filter string `yaml:"filter,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Port int `yaml:"port"` TagSeparator string `yaml:"tag_separator,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if c.Project == "" { return errors.New("GCE SD configuration requires a project") } if c.Zone == "" { return errors.New("GCE SD configuration requires a zone") } return nil } // Discovery periodically performs GCE-SD requests. It implements // the Discoverer interface. type Discovery struct { *refresh.Discovery project string zone string filter string client *http.Client svc *compute.Service isvc *compute.InstancesService port int tagSeparator string } // NewDiscovery returns a new Discovery which periodically refreshes its targets. func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { d := &Discovery{ project: conf.Project, zone: conf.Zone, filter: conf.Filter, port: conf.Port, tagSeparator: conf.TagSeparator, } var err error d.client, err = google.DefaultClient(context.Background(), compute.ComputeReadonlyScope) if err != nil { return nil, errors.Wrap(err, "error setting up communication with GCE service") } d.svc, err = compute.NewService(context.Background(), option.WithHTTPClient(d.client)) if err != nil { return nil, errors.Wrap(err, "error setting up communication with GCE service") } d.isvc = compute.NewInstancesService(d.svc) d.Discovery = refresh.NewDiscovery( logger, "gce", time.Duration(conf.RefreshInterval), d.refresh, ) return d, nil } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { tg := &targetgroup.Group{ Source: fmt.Sprintf("GCE_%s_%s", d.project, d.zone), } ilc := d.isvc.List(d.project, d.zone) if len(d.filter) > 0 { ilc = ilc.Filter(d.filter) } err := ilc.Pages(ctx, func(l *compute.InstanceList) error { for _, inst := range l.Items { if len(inst.NetworkInterfaces) == 0 { continue } labels := model.LabelSet{ gceLabelProject: model.LabelValue(d.project), gceLabelZone: model.LabelValue(inst.Zone), gceLabelInstanceID: model.LabelValue(strconv.FormatUint(inst.Id, 10)), gceLabelInstanceName: model.LabelValue(inst.Name), gceLabelInstanceStatus: model.LabelValue(inst.Status), gceLabelMachineType: model.LabelValue(inst.MachineType), } priIface := inst.NetworkInterfaces[0] labels[gceLabelNetwork] = model.LabelValue(priIface.Network) labels[gceLabelSubnetwork] = model.LabelValue(priIface.Subnetwork) labels[gceLabelPrivateIP] = model.LabelValue(priIface.NetworkIP) addr := fmt.Sprintf("%s:%d", priIface.NetworkIP, d.port) labels[model.AddressLabel] = model.LabelValue(addr) // Tags in GCE are usually only used for networking rules. if inst.Tags != nil && len(inst.Tags.Items) > 0 { // We surround the separated list with the separator as well. This way regular expressions // in relabeling rules don't have to consider tag positions. tags := d.tagSeparator + strings.Join(inst.Tags.Items, d.tagSeparator) + d.tagSeparator labels[gceLabelTags] = model.LabelValue(tags) } // GCE metadata are key-value pairs for user supplied attributes. if inst.Metadata != nil { for _, i := range inst.Metadata.Items { // Protect against occasional nil pointers. if i.Value == nil { continue } name := strutil.SanitizeLabelName(i.Key) labels[gceLabelMetadata+model.LabelName(name)] = model.LabelValue(*i.Value) } } // GCE labels are key-value pairs that group associated resources for key, value := range inst.Labels { name := strutil.SanitizeLabelName(key) labels[gceLabelLabel+model.LabelName(name)] = model.LabelValue(value) } if len(priIface.AccessConfigs) > 0 { ac := priIface.AccessConfigs[0] if ac.Type == "ONE_TO_ONE_NAT" { labels[gceLabelPublicIP] = model.LabelValue(ac.NatIP) } } tg.Targets = append(tg.Targets, labels) } return nil }) if err != nil { return nil, errors.Wrap(err, "error retrieving refresh targets from gce") } return []*targetgroup.Group{tg}, nil } prometheus-2.15.2+ds/discovery/kubernetes/000077500000000000000000000000001360540074000205535ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/kubernetes/client_metrics.go000066400000000000000000000153731360540074000241170ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "net/url" "time" "github.com/prometheus/client_golang/prometheus" "k8s.io/client-go/tools/metrics" "k8s.io/client-go/util/workqueue" ) const workqueueMetricsNamespace = metricsNamespace + "_workqueue" var ( // Metrics for client-go's HTTP requests. clientGoRequestResultMetricVec = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metricsNamespace, Name: "http_request_total", Help: "Total number of HTTP requests to the Kubernetes API by status code.", }, []string{"status_code"}, ) clientGoRequestLatencyMetricVec = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: metricsNamespace, Name: "http_request_duration_seconds", Help: "Summary of latencies for HTTP requests to the Kubernetes API by endpoint.", Objectives: map[float64]float64{}, }, []string{"endpoint"}, ) // Definition of metrics for client-go workflow metrics provider clientGoWorkqueueDepthMetricVec = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: workqueueMetricsNamespace, Name: "depth", Help: "Current depth of the work queue.", }, []string{"queue_name"}, ) clientGoWorkqueueAddsMetricVec = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: workqueueMetricsNamespace, Name: "items_total", Help: "Total number of items added to the work queue.", }, []string{"queue_name"}, ) clientGoWorkqueueLatencyMetricVec = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: workqueueMetricsNamespace, Name: "latency_seconds", Help: "How long an item stays in the work queue.", Objectives: map[float64]float64{}, }, []string{"queue_name"}, ) clientGoWorkqueueUnfinishedWorkSecondsMetricVec = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: workqueueMetricsNamespace, Name: "unfinished_work_seconds", Help: "How long an item has remained unfinished in the work queue.", }, []string{"queue_name"}, ) clientGoWorkqueueLongestRunningProcessorMetricVec = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: workqueueMetricsNamespace, Name: "longest_running_processor_seconds", Help: "Duration of the longest running processor in the work queue.", }, []string{"queue_name"}, ) clientGoWorkqueueWorkDurationMetricVec = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: workqueueMetricsNamespace, Name: "work_duration_seconds", Help: "How long processing an item from the work queue takes.", Objectives: map[float64]float64{}, }, []string{"queue_name"}, ) ) // Definition of dummy metric used as a placeholder if we don't want to observe some data. type noopMetric struct{} func (noopMetric) Inc() {} func (noopMetric) Dec() {} func (noopMetric) Observe(float64) {} func (noopMetric) Set(float64) {} // Definition of client-go metrics adapters for HTTP requests observation type clientGoRequestMetricAdapter struct{} func (f *clientGoRequestMetricAdapter) Register(registerer prometheus.Registerer) { metrics.Register(f, f) registerer.MustRegister( clientGoRequestResultMetricVec, clientGoRequestLatencyMetricVec, ) } func (clientGoRequestMetricAdapter) Increment(code string, method string, host string) { clientGoRequestResultMetricVec.WithLabelValues(code).Inc() } func (clientGoRequestMetricAdapter) Observe(verb string, u url.URL, latency time.Duration) { clientGoRequestLatencyMetricVec.WithLabelValues(u.EscapedPath()).Observe(latency.Seconds()) } // Definition of client-go workqueue metrics provider definition type clientGoWorkqueueMetricsProvider struct{} func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Registerer) { workqueue.SetProvider(f) registerer.MustRegister( clientGoWorkqueueDepthMetricVec, clientGoWorkqueueAddsMetricVec, clientGoWorkqueueLatencyMetricVec, clientGoWorkqueueWorkDurationMetricVec, clientGoWorkqueueUnfinishedWorkSecondsMetricVec, clientGoWorkqueueLongestRunningProcessorMetricVec, ) } func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { return clientGoWorkqueueDepthMetricVec.WithLabelValues(name) } func (f *clientGoWorkqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric { return clientGoWorkqueueAddsMetricVec.WithLabelValues(name) } func (f *clientGoWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.HistogramMetric { return clientGoWorkqueueLatencyMetricVec.WithLabelValues(name) } func (f *clientGoWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric { return clientGoWorkqueueWorkDurationMetricVec.WithLabelValues(name) } func (f *clientGoWorkqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric { return clientGoWorkqueueUnfinishedWorkSecondsMetricVec.WithLabelValues(name) } func (f *clientGoWorkqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric { return clientGoWorkqueueLongestRunningProcessorMetricVec.WithLabelValues(name) } func (clientGoWorkqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric { // Retries are not used so the metric is omitted. return noopMetric{} } func (clientGoWorkqueueMetricsProvider) NewDeprecatedDepthMetric(name string) workqueue.GaugeMetric { return noopMetric{} } func (clientGoWorkqueueMetricsProvider) NewDeprecatedAddsMetric(name string) workqueue.CounterMetric { return noopMetric{} } func (clientGoWorkqueueMetricsProvider) NewDeprecatedLatencyMetric(name string) workqueue.SummaryMetric { return noopMetric{} } func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedWorkDurationMetric(name string) workqueue.SummaryMetric { return noopMetric{} } func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric { return noopMetric{} } func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(name string) workqueue.SettableGaugeMetric { return noopMetric{} } func (clientGoWorkqueueMetricsProvider) NewDeprecatedRetriesMetric(name string) workqueue.CounterMetric { return noopMetric{} } prometheus-2.15.2+ds/discovery/kubernetes/endpoints.go000066400000000000000000000240721360540074000231120ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "net" "strconv" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "github.com/prometheus/prometheus/discovery/targetgroup" ) // Endpoints discovers new endpoint targets. type Endpoints struct { logger log.Logger endpointsInf cache.SharedInformer serviceInf cache.SharedInformer podInf cache.SharedInformer podStore cache.Store endpointsStore cache.Store serviceStore cache.Store queue *workqueue.Type } // NewEndpoints returns a new endpoints discovery. func NewEndpoints(l log.Logger, svc, eps, pod cache.SharedInformer) *Endpoints { if l == nil { l = log.NewNopLogger() } e := &Endpoints{ logger: l, endpointsInf: eps, endpointsStore: eps.GetStore(), serviceInf: svc, serviceStore: svc.GetStore(), podInf: pod, podStore: pod.GetStore(), queue: workqueue.NewNamed("endpoints"), } e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { eventCount.WithLabelValues("endpoints", "add").Inc() e.enqueue(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("endpoints", "update").Inc() e.enqueue(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("endpoints", "delete").Inc() e.enqueue(o) }, }) serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) return } ep := &apiv1.Endpoints{} ep.Namespace = svc.Namespace ep.Name = svc.Name obj, exists, err := e.endpointsStore.Get(ep) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } if err != nil { level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err) } } e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ // TODO(fabxc): potentially remove add and delete event handlers. Those should // be triggered via the endpoint handlers already. AddFunc: func(o interface{}) { eventCount.WithLabelValues("service", "add").Inc() serviceUpdate(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("service", "update").Inc() serviceUpdate(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("service", "delete").Inc() serviceUpdate(o) }, }) return e } func (e *Endpoints) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } e.queue.Add(key) } // Run implements the Discoverer interface. func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer e.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), e.endpointsInf.HasSynced, e.serviceInf.HasSynced, e.podInf.HasSynced) { if ctx.Err() != context.Canceled { level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") } return } go func() { for e.process(ctx, ch) { } }() // Block until the target provider is explicitly canceled. <-ctx.Done() } func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool { keyObj, quit := e.queue.Get() if quit { return false } defer e.queue.Done(keyObj) key := keyObj.(string) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { level.Error(e.logger).Log("msg", "splitting key failed", "key", key) return true } o, exists, err := e.endpointsStore.GetByKey(key) if err != nil { level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) return true } if !exists { send(ctx, e.logger, RoleEndpoint, ch, &targetgroup.Group{Source: endpointsSourceFromNamespaceAndName(namespace, name)}) return true } eps, err := convertToEndpoints(o) if err != nil { level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err) return true } send(ctx, e.logger, RoleEndpoint, ch, e.buildEndpoints(eps)) return true } func convertToEndpoints(o interface{}) (*apiv1.Endpoints, error) { endpoints, ok := o.(*apiv1.Endpoints) if ok { return endpoints, nil } return nil, errors.Errorf("received unexpected object: %v", o) } func endpointsSource(ep *apiv1.Endpoints) string { return endpointsSourceFromNamespaceAndName(ep.Namespace, ep.Name) } func endpointsSourceFromNamespaceAndName(namespace, name string) string { return "endpoints/" + namespace + "/" + name } const ( endpointsNameLabel = metaLabelPrefix + "endpoints_name" endpointNodeName = metaLabelPrefix + "endpoint_node_name" endpointHostname = metaLabelPrefix + "endpoint_hostname" endpointReadyLabel = metaLabelPrefix + "endpoint_ready" endpointPortNameLabel = metaLabelPrefix + "endpoint_port_name" endpointPortProtocolLabel = metaLabelPrefix + "endpoint_port_protocol" endpointAddressTargetKindLabel = metaLabelPrefix + "endpoint_address_target_kind" endpointAddressTargetNameLabel = metaLabelPrefix + "endpoint_address_target_name" ) func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { tg := &targetgroup.Group{ Source: endpointsSource(eps), } tg.Labels = model.LabelSet{ namespaceLabel: lv(eps.Namespace), endpointsNameLabel: lv(eps.Name), } e.addServiceLabels(eps.Namespace, eps.Name, tg) type podEntry struct { pod *apiv1.Pod servicePorts []apiv1.EndpointPort } seenPods := map[string]*podEntry{} add := func(addr apiv1.EndpointAddress, port apiv1.EndpointPort, ready string) { a := net.JoinHostPort(addr.IP, strconv.FormatUint(uint64(port.Port), 10)) target := model.LabelSet{ model.AddressLabel: lv(a), endpointPortNameLabel: lv(port.Name), endpointPortProtocolLabel: lv(string(port.Protocol)), endpointReadyLabel: lv(ready), } if addr.TargetRef != nil { target[model.LabelName(endpointAddressTargetKindLabel)] = lv(addr.TargetRef.Kind) target[model.LabelName(endpointAddressTargetNameLabel)] = lv(addr.TargetRef.Name) } if addr.NodeName != nil { target[model.LabelName(endpointNodeName)] = lv(*addr.NodeName) } if addr.Hostname != "" { target[model.LabelName(endpointHostname)] = lv(addr.Hostname) } pod := e.resolvePodRef(addr.TargetRef) if pod == nil { // This target is not a Pod, so don't continue with Pod specific logic. tg.Targets = append(tg.Targets, target) return } s := pod.Namespace + "/" + pod.Name sp, ok := seenPods[s] if !ok { sp = &podEntry{pod: pod} seenPods[s] = sp } // Attach standard pod labels. target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. for _, c := range pod.Spec.Containers { for _, cport := range c.Ports { if port.Port == cport.ContainerPort { ports := strconv.FormatUint(uint64(port.Port), 10) target[podContainerNameLabel] = lv(c.Name) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(port.Protocol)) break } } } // Add service port so we know that we have already generated a target // for it. sp.servicePorts = append(sp.servicePorts, port) tg.Targets = append(tg.Targets, target) } for _, ss := range eps.Subsets { for _, port := range ss.Ports { for _, addr := range ss.Addresses { add(addr, port, "true") } // Although this generates the same target again, as it was generated in // the loop above, it causes the ready meta label to be overridden. for _, addr := range ss.NotReadyAddresses { add(addr, port, "false") } } } // For all seen pods, check all container ports. If they were not covered // by one of the service endpoints, generate targets for them. for _, pe := range seenPods { for _, c := range pe.pod.Spec.Containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { if cport.ContainerPort == eport.Port { return true } } return false } if hasSeenPort() { continue } a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } } } return tg } func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } p := &apiv1.Pod{} p.Namespace = ref.Namespace p.Name = ref.Name obj, exists, err := e.podStore.Get(p) if err != nil { level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) return nil } if !exists { return nil } return obj.(*apiv1.Pod) } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { svc := &apiv1.Service{} svc.Namespace = ns svc.Name = name obj, exists, err := e.serviceStore.Get(svc) if err != nil { level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) return } if !exists { return } svc = obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } prometheus-2.15.2+ds/discovery/kubernetes/endpoints_test.go000066400000000000000000000443421360540074000241530ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ) func makeEndpoints() *v1.Endpoints { var nodeName = "foobar" return &v1.Endpoints{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, Subsets: []v1.EndpointSubset{ { Addresses: []v1.EndpointAddress{ { IP: "1.2.3.4", Hostname: "testendpoint1", NodeName: &nodeName, }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9000, Protocol: v1.ProtocolTCP, }, }, }, { Addresses: []v1.EndpointAddress{ { IP: "2.3.4.5", }, }, NotReadyAddresses: []v1.EndpointAddress{ { IP: "2.3.4.5", }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9001, Protocol: v1.ProtocolTCP, }, }, }, }, } } func TestEndpointsDiscoveryBeforeRun(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := makeEndpoints() c.CoreV1().Endpoints(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_endpoint_hostname": "testendpoint1", "__meta_kubernetes_endpoint_node_name": "foobar", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "default", "__meta_kubernetes_endpoints_name": "testendpoints", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryAdd(t *testing.T) { obj := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "default", UID: types.UID("deadbeef"), }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "c1", Ports: []v1.ContainerPort{ { Name: "mainport", ContainerPort: 9000, Protocol: v1.ProtocolTCP, }, }, }, { Name: "c2", Ports: []v1.ContainerPort{ { Name: "sideport", ContainerPort: 9001, Protocol: v1.ProtocolTCP, }, }, }, }, }, Status: v1.PodStatus{ HostIP: "2.3.4.5", PodIP: "1.2.3.4", }, } n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := &v1.Endpoints{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, Subsets: []v1.EndpointSubset{ { Addresses: []v1.EndpointAddress{ { IP: "4.3.2.1", TargetRef: &v1.ObjectReference{ Kind: "Pod", Name: "testpod", Namespace: "default", }, }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9000, Protocol: v1.ProtocolTCP, }, }, }, }, } c.CoreV1().Endpoints(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Targets: []model.LabelSet{ { "__address__": "4.3.2.1:9000", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_address_target_kind": "Pod", "__meta_kubernetes_endpoint_address_target_name": "testpod", "__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_pod_ip": "1.2.3.4", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_container_name": "c1", "__meta_kubernetes_pod_container_port_name": "mainport", "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", }, { "__address__": "1.2.3.4:9001", "__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_pod_ip": "1.2.3.4", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_container_name": "c2", "__meta_kubernetes_pod_container_port_name": "sideport", "__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", }, }, Labels: model.LabelSet{ "__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_namespace": "default", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryDelete(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeEndpoints() c.CoreV1().Endpoints(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryUpdate(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := &v1.Endpoints{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, Subsets: []v1.EndpointSubset{ { Addresses: []v1.EndpointAddress{ { IP: "1.2.3.4", }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9000, Protocol: v1.ProtocolTCP, }, }, }, { Addresses: []v1.EndpointAddress{ { IP: "2.3.4.5", }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9001, Protocol: v1.ProtocolTCP, }, }, }, }, } c.CoreV1().Endpoints(obj.Namespace).Update(obj) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "default", "__meta_kubernetes_endpoints_name": "testendpoints", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryEmptySubsets(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := &v1.Endpoints{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, Subsets: []v1.EndpointSubset{}, } c.CoreV1().Endpoints(obj.Namespace).Update(obj) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Labels: model.LabelSet{ "__meta_kubernetes_namespace": "default", "__meta_kubernetes_endpoints_name": "testendpoints", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryWithService(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", Labels: map[string]string{ "app/name": "test", }, }, } c.CoreV1().Services(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_endpoint_hostname": "testendpoint1", "__meta_kubernetes_endpoint_node_name": "foobar", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "default", "__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_service_label_app_name": "test", "__meta_kubernetes_service_labelpresent_app_name": "true", "__meta_kubernetes_service_name": "testendpoints", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) { n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", Labels: map[string]string{ "app/name": "test", }, }, } c.CoreV1().Services(obj.Namespace).Create(obj) }, afterStart: func() { obj := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", Labels: map[string]string{ "app/name": "svc", "component": "testing", }, }, } c.CoreV1().Services(obj.Namespace).Update(obj) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpoints/default/testendpoints": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_endpoint_hostname": "testendpoint1", "__meta_kubernetes_endpoint_node_name": "foobar", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "default", "__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_service_label_app_name": "svc", "__meta_kubernetes_service_labelpresent_app_name": "true", "__meta_kubernetes_service_name": "testendpoints", "__meta_kubernetes_service_label_component": "testing", "__meta_kubernetes_service_labelpresent_component": "true", }, Source: "endpoints/default/testendpoints", }, }, }.Run(t) } func TestEndpointsDiscoveryNamespaces(t *testing.T) { epOne := makeEndpoints() epOne.Namespace = "ns1" objs := []runtime.Object{ epOne, &v1.Endpoints{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "ns2", }, Subsets: []v1.EndpointSubset{ { Addresses: []v1.EndpointAddress{ { IP: "4.3.2.1", TargetRef: &v1.ObjectReference{ Kind: "Pod", Name: "testpod", Namespace: "ns2", }, }, }, Ports: []v1.EndpointPort{ { Name: "testport", Port: 9000, Protocol: v1.ProtocolTCP, }, }, }, }, }, &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "ns1", Labels: map[string]string{ "app": "app1", }, }, }, &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "ns2", UID: types.UID("deadbeef"), }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "c1", Ports: []v1.ContainerPort{ { Name: "mainport", ContainerPort: 9000, Protocol: v1.ProtocolTCP, }, }, }, }, }, Status: v1.PodStatus{ HostIP: "2.3.4.5", PodIP: "4.3.2.1", }, }, } n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, objs...) k8sDiscoveryTest{ discovery: n, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpoints/ns1/testendpoints": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_endpoint_hostname": "testendpoint1", "__meta_kubernetes_endpoint_node_name": "foobar", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", }, { "__address__": "2.3.4.5:9001", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "ns1", "__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_service_label_app": "app1", "__meta_kubernetes_service_labelpresent_app": "true", "__meta_kubernetes_service_name": "testendpoints", }, Source: "endpoints/ns1/testendpoints", }, "endpoints/ns2/testendpoints": { Targets: []model.LabelSet{ { "__address__": "4.3.2.1:9000", "__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_address_target_kind": "Pod", "__meta_kubernetes_endpoint_address_target_name": "testpod", "__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_pod_ip": "4.3.2.1", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_container_name": "c1", "__meta_kubernetes_pod_container_port_name": "mainport", "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", }, }, Labels: model.LabelSet{ "__meta_kubernetes_namespace": "ns2", "__meta_kubernetes_endpoints_name": "testendpoints", }, Source: "endpoints/ns2/testendpoints", }, }, }.Run(t) } prometheus-2.15.2+ds/discovery/kubernetes/ingress.go000066400000000000000000000135141360540074000225600ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" "k8s.io/api/extensions/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) // Ingress implements discovery of Kubernetes ingress. type Ingress struct { logger log.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewIngress returns a new ingress discovery. func NewIngress(l log.Logger, inf cache.SharedInformer) *Ingress { s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")} s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { eventCount.WithLabelValues("ingress", "add").Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("ingress", "delete").Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("ingress", "update").Inc() s.enqueue(o) }, }) return s } func (i *Ingress) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } i.queue.Add(key) } // Run implements the Discoverer interface. func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer i.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) { if ctx.Err() != context.Canceled { level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") } return } go func() { for i.process(ctx, ch) { } }() // Block until the target provider is explicitly canceled. <-ctx.Done() } func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool { keyObj, quit := i.queue.Get() if quit { return false } defer i.queue.Done(keyObj) key := keyObj.(string) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { return true } o, exists, err := i.store.GetByKey(key) if err != nil { return true } if !exists { send(ctx, i.logger, RoleIngress, ch, &targetgroup.Group{Source: ingressSourceFromNamespaceAndName(namespace, name)}) return true } eps, err := convertToIngress(o) if err != nil { level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err", err) return true } send(ctx, i.logger, RoleIngress, ch, i.buildIngress(eps)) return true } func convertToIngress(o interface{}) (*v1beta1.Ingress, error) { ingress, ok := o.(*v1beta1.Ingress) if ok { return ingress, nil } return nil, errors.Errorf("received unexpected object: %v", o) } func ingressSource(s *v1beta1.Ingress) string { return ingressSourceFromNamespaceAndName(s.Namespace, s.Name) } func ingressSourceFromNamespaceAndName(namespace, name string) string { return "ingress/" + namespace + "/" + name } const ( ingressNameLabel = metaLabelPrefix + "ingress_name" ingressLabelPrefix = metaLabelPrefix + "ingress_label_" ingressLabelPresentPrefix = metaLabelPrefix + "ingress_labelpresent_" ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_" ingressAnnotationPresentPrefix = metaLabelPrefix + "ingress_annotationpresent_" ingressSchemeLabel = metaLabelPrefix + "ingress_scheme" ingressHostLabel = metaLabelPrefix + "ingress_host" ingressPathLabel = metaLabelPrefix + "ingress_path" ) func ingressLabels(ingress *v1beta1.Ingress) model.LabelSet { // Each label and annotation will create two key-value pairs in the map. ls := make(model.LabelSet, 2*(len(ingress.Labels)+len(ingress.Annotations))+2) ls[ingressNameLabel] = lv(ingress.Name) ls[namespaceLabel] = lv(ingress.Namespace) for k, v := range ingress.Labels { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(ingressLabelPrefix+ln)] = lv(v) ls[model.LabelName(ingressLabelPresentPrefix+ln)] = presentValue } for k, v := range ingress.Annotations { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(ingressAnnotationPrefix+ln)] = lv(v) ls[model.LabelName(ingressAnnotationPresentPrefix+ln)] = presentValue } return ls } func pathsFromIngressRule(rv *v1beta1.IngressRuleValue) []string { if rv.HTTP == nil { return []string{"/"} } paths := make([]string, len(rv.HTTP.Paths)) for n, p := range rv.HTTP.Paths { path := p.Path if path == "" { path = "/" } paths[n] = path } return paths } func (i *Ingress) buildIngress(ingress *v1beta1.Ingress) *targetgroup.Group { tg := &targetgroup.Group{ Source: ingressSource(ingress), } tg.Labels = ingressLabels(ingress) tlsHosts := make(map[string]struct{}) for _, tls := range ingress.Spec.TLS { for _, host := range tls.Hosts { tlsHosts[host] = struct{}{} } } for _, rule := range ingress.Spec.Rules { paths := pathsFromIngressRule(&rule.IngressRuleValue) scheme := "http" _, isTLS := tlsHosts[rule.Host] if isTLS { scheme = "https" } for _, path := range paths { tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: lv(rule.Host), ingressSchemeLabel: lv(scheme), ingressHostLabel: lv(rule.Host), ingressPathLabel: lv(path), }) } } return tg } prometheus-2.15.2+ds/discovery/kubernetes/ingress_test.go000066400000000000000000000124751360540074000236240ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "fmt" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "k8s.io/api/extensions/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type TLSMode int const ( TLSNo TLSMode = iota TLSYes TLSMixed ) func makeIngress(tls TLSMode) *v1beta1.Ingress { ret := &v1beta1.Ingress{ ObjectMeta: metav1.ObjectMeta{ Name: "testingress", Namespace: "default", Labels: map[string]string{"test/label": "testvalue"}, Annotations: map[string]string{"test/annotation": "testannotationvalue"}, }, Spec: v1beta1.IngressSpec{ TLS: nil, Rules: []v1beta1.IngressRule{ { Host: "example.com", IngressRuleValue: v1beta1.IngressRuleValue{ HTTP: &v1beta1.HTTPIngressRuleValue{ Paths: []v1beta1.HTTPIngressPath{ {Path: "/"}, {Path: "/foo"}, }, }, }, }, { // No backend config, ignored Host: "nobackend.example.com", IngressRuleValue: v1beta1.IngressRuleValue{ HTTP: &v1beta1.HTTPIngressRuleValue{}, }, }, { Host: "test.example.com", IngressRuleValue: v1beta1.IngressRuleValue{ HTTP: &v1beta1.HTTPIngressRuleValue{ Paths: []v1beta1.HTTPIngressPath{{}}, }, }, }, }, }, } switch tls { case TLSYes: ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com", "test.example.com"}}} case TLSMixed: ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com"}}} } return ret } func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group { scheme1 := "http" scheme2 := "http" switch tls { case TLSYes: scheme1 = "https" scheme2 = "https" case TLSMixed: scheme1 = "https" } key := fmt.Sprintf("ingress/%s/testingress", ns) return map[string]*targetgroup.Group{ key: { Targets: []model.LabelSet{ { "__meta_kubernetes_ingress_scheme": lv(scheme1), "__meta_kubernetes_ingress_host": "example.com", "__meta_kubernetes_ingress_path": "/", "__address__": "example.com", }, { "__meta_kubernetes_ingress_scheme": lv(scheme1), "__meta_kubernetes_ingress_host": "example.com", "__meta_kubernetes_ingress_path": "/foo", "__address__": "example.com", }, { "__meta_kubernetes_ingress_scheme": lv(scheme2), "__meta_kubernetes_ingress_host": "test.example.com", "__address__": "test.example.com", "__meta_kubernetes_ingress_path": "/", }, }, Labels: model.LabelSet{ "__meta_kubernetes_ingress_name": "testingress", "__meta_kubernetes_namespace": lv(ns), "__meta_kubernetes_ingress_label_test_label": "testvalue", "__meta_kubernetes_ingress_labelpresent_test_label": "true", "__meta_kubernetes_ingress_annotation_test_annotation": "testannotationvalue", "__meta_kubernetes_ingress_annotationpresent_test_annotation": "true", }, Source: key, }, } } func TestIngressDiscoveryAdd(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeIngress(TLSNo) c.ExtensionsV1beta1().Ingresses("default").Create(obj) }, expectedMaxItems: 1, expectedRes: expectedTargetGroups("default", TLSNo), }.Run(t) } func TestIngressDiscoveryAddTLS(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeIngress(TLSYes) c.ExtensionsV1beta1().Ingresses("default").Create(obj) }, expectedMaxItems: 1, expectedRes: expectedTargetGroups("default", TLSYes), }.Run(t) } func TestIngressDiscoveryAddMixed(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeIngress(TLSMixed) c.ExtensionsV1beta1().Ingresses("default").Create(obj) }, expectedMaxItems: 1, expectedRes: expectedTargetGroups("default", TLSMixed), }.Run(t) } func TestIngressDiscoveryNamespaces(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) expected := expectedTargetGroups("ns1", TLSNo) for k, v := range expectedTargetGroups("ns2", TLSNo) { expected[k] = v } k8sDiscoveryTest{ discovery: n, afterStart: func() { for _, ns := range []string{"ns1", "ns2"} { obj := makeIngress(TLSNo) obj.Namespace = ns c.ExtensionsV1beta1().Ingresses(obj.Namespace).Create(obj) } }, expectedMaxItems: 2, expectedRes: expected, }.Run(t) } prometheus-2.15.2+ds/discovery/kubernetes/kubernetes.go000066400000000000000000000246731360540074000232650ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "reflect" "sync" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" extensionsv1beta1 "k8s.io/api/extensions/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) const ( // kubernetesMetaLabelPrefix is the meta prefix used for all meta labels. // in this discovery. metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_" namespaceLabel = metaLabelPrefix + "namespace" metricsNamespace = "prometheus_sd_kubernetes" presentValue = model.LabelValue("true") ) var ( // Custom events metric eventCount = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metricsNamespace, Name: "events_total", Help: "The number of Kubernetes events handled.", }, []string{"role", "event"}, ) // DefaultSDConfig is the default Kubernetes SD configuration DefaultSDConfig = SDConfig{} ) // Role is role of the service in Kubernetes. type Role string // The valid options for Role. const ( RoleNode Role = "node" RolePod Role = "pod" RoleService Role = "service" RoleEndpoint Role = "endpoints" RoleIngress Role = "ingress" ) // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error { if err := unmarshal((*string)(c)); err != nil { return err } switch *c { case RoleNode, RolePod, RoleService, RoleEndpoint, RoleIngress: return nil default: return errors.Errorf("unknown Kubernetes SD role %q", *c) } } // SDConfig is the configuration for Kubernetes service discovery. type SDConfig struct { APIServer config_util.URL `yaml:"api_server,omitempty"` Role Role `yaml:"role"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = SDConfig{} type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if c.Role == "" { return errors.Errorf("role missing (one of: pod, service, endpoints, node, ingress)") } err = c.HTTPClientConfig.Validate() if err != nil { return err } if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config_util.HTTPClientConfig{}) { return errors.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly") } return nil } // NamespaceDiscovery is the configuration for discovering // Kubernetes namespaces. type NamespaceDiscovery struct { Names []string `yaml:"names"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *NamespaceDiscovery) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = NamespaceDiscovery{} type plain NamespaceDiscovery return unmarshal((*plain)(c)) } func init() { prometheus.MustRegister(eventCount) // Initialize metric vectors. for _, role := range []string{"endpoints", "node", "pod", "service", "ingress"} { for _, evt := range []string{"add", "delete", "update"} { eventCount.WithLabelValues(role, evt) } } var ( clientGoRequestMetricAdapterInstance = clientGoRequestMetricAdapter{} clientGoWorkqueueMetricsProviderInstance = clientGoWorkqueueMetricsProvider{} ) clientGoRequestMetricAdapterInstance.Register(prometheus.DefaultRegisterer) clientGoWorkqueueMetricsProviderInstance.Register(prometheus.DefaultRegisterer) } // This is only for internal use. type discoverer interface { Run(ctx context.Context, up chan<- []*targetgroup.Group) } // Discovery implements the discoverer interface for discovering // targets from Kubernetes. type Discovery struct { sync.RWMutex client kubernetes.Interface role Role logger log.Logger namespaceDiscovery *NamespaceDiscovery discoverers []discoverer } func (d *Discovery) getNamespaces() []string { namespaces := d.namespaceDiscovery.Names if len(namespaces) == 0 { namespaces = []string{apiv1.NamespaceAll} } return namespaces } // New creates a new Kubernetes discovery for the given role. func New(l log.Logger, conf *SDConfig) (*Discovery, error) { if l == nil { l = log.NewNopLogger() } var ( kcfg *rest.Config err error ) if conf.APIServer.URL == nil { // Use the Kubernetes provided pod service account // as described in https://kubernetes.io/docs/admin/service-accounts-admin/ kcfg, err = rest.InClusterConfig() if err != nil { return nil, err } level.Info(l).Log("msg", "Using pod service account via in-cluster config") } else { rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd", false) if err != nil { return nil, err } kcfg = &rest.Config{ Host: conf.APIServer.String(), Transport: rt, } } kcfg.UserAgent = "Prometheus/discovery" c, err := kubernetes.NewForConfig(kcfg) if err != nil { return nil, err } return &Discovery{ client: c, logger: l, role: conf.Role, namespaceDiscovery: &conf.NamespaceDiscovery, discoverers: make([]discoverer, 0), }, nil } const resyncPeriod = 10 * time.Minute // Run implements the discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { d.Lock() namespaces := d.getNamespaces() switch d.role { case RoleEndpoint: for _, namespace := range namespaces { e := d.client.CoreV1().Endpoints(namespace) elw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return e.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return e.Watch(options) }, } s := d.client.CoreV1().Services(namespace) slw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return s.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return s.Watch(options) }, } p := d.client.CoreV1().Pods(namespace) plw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return p.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return p.Watch(options) }, } eps := NewEndpoints( log.With(d.logger, "role", "endpoint"), cache.NewSharedInformer(slw, &apiv1.Service{}, resyncPeriod), cache.NewSharedInformer(elw, &apiv1.Endpoints{}, resyncPeriod), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncPeriod), ) d.discoverers = append(d.discoverers, eps) go eps.endpointsInf.Run(ctx.Done()) go eps.serviceInf.Run(ctx.Done()) go eps.podInf.Run(ctx.Done()) } case RolePod: for _, namespace := range namespaces { p := d.client.CoreV1().Pods(namespace) plw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return p.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return p.Watch(options) }, } pod := NewPod( log.With(d.logger, "role", "pod"), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncPeriod), ) d.discoverers = append(d.discoverers, pod) go pod.informer.Run(ctx.Done()) } case RoleService: for _, namespace := range namespaces { s := d.client.CoreV1().Services(namespace) slw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return s.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return s.Watch(options) }, } svc := NewService( log.With(d.logger, "role", "service"), cache.NewSharedInformer(slw, &apiv1.Service{}, resyncPeriod), ) d.discoverers = append(d.discoverers, svc) go svc.informer.Run(ctx.Done()) } case RoleIngress: for _, namespace := range namespaces { i := d.client.ExtensionsV1beta1().Ingresses(namespace) ilw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return i.List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return i.Watch(options) }, } ingress := NewIngress( log.With(d.logger, "role", "ingress"), cache.NewSharedInformer(ilw, &extensionsv1beta1.Ingress{}, resyncPeriod), ) d.discoverers = append(d.discoverers, ingress) go ingress.informer.Run(ctx.Done()) } case RoleNode: nlw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { return d.client.CoreV1().Nodes().List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { return d.client.CoreV1().Nodes().Watch(options) }, } node := NewNode( log.With(d.logger, "role", "node"), cache.NewSharedInformer(nlw, &apiv1.Node{}, resyncPeriod), ) d.discoverers = append(d.discoverers, node) go node.informer.Run(ctx.Done()) default: level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role) } var wg sync.WaitGroup for _, dd := range d.discoverers { wg.Add(1) go func(d discoverer) { defer wg.Done() d.Run(ctx, ch) }(dd) } d.Unlock() wg.Wait() <-ctx.Done() } func lv(s string) model.LabelValue { return model.LabelValue(s) } func send(ctx context.Context, l log.Logger, role Role, ch chan<- []*targetgroup.Group, tg *targetgroup.Group) { if tg == nil { return } select { case <-ctx.Done(): case ch <- []*targetgroup.Group{tg}: } } prometheus-2.15.2+ds/discovery/kubernetes/kubernetes_test.go000066400000000000000000000115601360540074000243130ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "encoding/json" "testing" "time" "github.com/go-kit/kit/log" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/cache" ) // makeDiscovery creates a kubernetes.Discovery instance for testing. func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) { clientset := fake.NewSimpleClientset(objects...) return &Discovery{ client: clientset, logger: log.NewNopLogger(), role: role, namespaceDiscovery: &nsDiscovery, }, clientset } type k8sDiscoveryTest struct { // discovery is instance of discovery.Discoverer discovery discoverer // beforeRun runs before discoverer run beforeRun func() // afterStart runs after discoverer has synced afterStart func() // expectedMaxItems is expected max items we may get from channel expectedMaxItems int // expectedRes is expected final result expectedRes map[string]*targetgroup.Group } func (d k8sDiscoveryTest) Run(t *testing.T) { t.Helper() ch := make(chan []*targetgroup.Group) ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() if d.beforeRun != nil { d.beforeRun() } // Run discoverer and start a goroutine to read results. go d.discovery.Run(ctx, ch) resChan := make(chan map[string]*targetgroup.Group) go readResultWithTimeout(t, ch, d.expectedMaxItems, time.Second, resChan) dd, ok := d.discovery.(hasSynced) if !ok { t.Errorf("discoverer does not implement hasSynced interface") return } if !cache.WaitForCacheSync(ctx.Done(), dd.hasSynced) { t.Errorf("discoverer failed to sync: %v", dd) return } if d.afterStart != nil { d.afterStart() } if d.expectedRes != nil { res := <-resChan requireTargetGroups(t, d.expectedRes, res) } } // readResultWithTimeout reads all targegroups from channel with timeout. // It merges targegroups by source and sends the result to result channel. func readResultWithTimeout(t *testing.T, ch <-chan []*targetgroup.Group, max int, timeout time.Duration, resChan chan<- map[string]*targetgroup.Group) { allTgs := make([][]*targetgroup.Group, 0) Loop: for { select { case tgs := <-ch: allTgs = append(allTgs, tgs) if len(allTgs) == max { // Reached max target groups we may get, break fast. break Loop } case <-time.After(timeout): // Because we use queue, an object that is created then // deleted or updated may be processed only once. // So possibly we may skip events, timed out here. t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(allTgs), max) break Loop } } // Merge by source and sent it to channel. res := make(map[string]*targetgroup.Group) for _, tgs := range allTgs { for _, tg := range tgs { if tg == nil { continue } res[tg.Source] = tg } } resChan <- res } func requireTargetGroups(t *testing.T, expected, res map[string]*targetgroup.Group) { t.Helper() b1, err := json.Marshal(expected) if err != nil { panic(err) } b2, err := json.Marshal(res) if err != nil { panic(err) } testutil.Equals(t, string(b1), string(b2)) } type hasSynced interface { // hasSynced returns true if all informers synced. // This is only used in testing to determine when discoverer synced to // kubernetes apiserver. hasSynced() bool } var _ hasSynced = &Discovery{} var _ hasSynced = &Node{} var _ hasSynced = &Endpoints{} var _ hasSynced = &Ingress{} var _ hasSynced = &Pod{} var _ hasSynced = &Service{} func (d *Discovery) hasSynced() bool { d.RLock() defer d.RUnlock() for _, discoverer := range d.discoverers { if hasSynceddiscoverer, ok := discoverer.(hasSynced); ok { if !hasSynceddiscoverer.hasSynced() { return false } } } return true } func (n *Node) hasSynced() bool { return n.informer.HasSynced() } func (e *Endpoints) hasSynced() bool { return e.endpointsInf.HasSynced() && e.serviceInf.HasSynced() && e.podInf.HasSynced() } func (i *Ingress) hasSynced() bool { return i.informer.HasSynced() } func (p *Pod) hasSynced() bool { return p.informer.HasSynced() } func (s *Service) hasSynced() bool { return s.informer.HasSynced() } prometheus-2.15.2+ds/discovery/kubernetes/node.go000066400000000000000000000142031360540074000220270ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "net" "strconv" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( NodeLegacyHostIP = "LegacyHostIP" ) // Node discovers Kubernetes nodes. type Node struct { logger log.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewNode returns a new node discovery. func NewNode(l log.Logger, inf cache.SharedInformer) *Node { if l == nil { l = log.NewNopLogger() } n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")} n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { eventCount.WithLabelValues("node", "add").Inc() n.enqueue(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("node", "delete").Inc() n.enqueue(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("node", "update").Inc() n.enqueue(o) }, }) return n } func (n *Node) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } n.queue.Add(key) } // Run implements the Discoverer interface. func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer n.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) { if ctx.Err() != context.Canceled { level.Error(n.logger).Log("msg", "node informer unable to sync cache") } return } go func() { for n.process(ctx, ch) { } }() // Block until the target provider is explicitly canceled. <-ctx.Done() } func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool { keyObj, quit := n.queue.Get() if quit { return false } defer n.queue.Done(keyObj) key := keyObj.(string) _, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { return true } o, exists, err := n.store.GetByKey(key) if err != nil { return true } if !exists { send(ctx, n.logger, RoleNode, ch, &targetgroup.Group{Source: nodeSourceFromName(name)}) return true } node, err := convertToNode(o) if err != nil { level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err) return true } send(ctx, n.logger, RoleNode, ch, n.buildNode(node)) return true } func convertToNode(o interface{}) (*apiv1.Node, error) { node, ok := o.(*apiv1.Node) if ok { return node, nil } return nil, errors.Errorf("received unexpected object: %v", o) } func nodeSource(n *apiv1.Node) string { return nodeSourceFromName(n.Name) } func nodeSourceFromName(name string) string { return "node/" + name } const ( nodeNameLabel = metaLabelPrefix + "node_name" nodeLabelPrefix = metaLabelPrefix + "node_label_" nodeLabelPresentPrefix = metaLabelPrefix + "node_labelpresent_" nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_" nodeAnnotationPresentPrefix = metaLabelPrefix + "node_annotationpresent_" nodeAddressPrefix = metaLabelPrefix + "node_address_" ) func nodeLabels(n *apiv1.Node) model.LabelSet { // Each label and annotation will create two key-value pairs in the map. ls := make(model.LabelSet, 2*(len(n.Labels)+len(n.Annotations))+1) ls[nodeNameLabel] = lv(n.Name) for k, v := range n.Labels { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(nodeLabelPrefix+ln)] = lv(v) ls[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue } for k, v := range n.Annotations { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(nodeAnnotationPrefix+ln)] = lv(v) ls[model.LabelName(nodeAnnotationPresentPrefix+ln)] = presentValue } return ls } func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group { tg := &targetgroup.Group{ Source: nodeSource(node), } tg.Labels = nodeLabels(node) addr, addrMap, err := nodeAddress(node) if err != nil { level.Warn(n.logger).Log("msg", "No node address found", "err", err) return nil } addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10)) t := model.LabelSet{ model.AddressLabel: lv(addr), model.InstanceLabel: lv(node.Name), } for ty, a := range addrMap { ln := strutil.SanitizeLabelName(nodeAddressPrefix + string(ty)) t[model.LabelName(ln)] = lv(a[0]) } tg.Targets = append(tg.Targets, t) return tg } // nodeAddresses returns the provided node's address, based on the priority: // 1. NodeInternalIP // 2. NodeInternalDNS // 3. NodeExternalIP // 4. NodeExternalDNS // 5. NodeLegacyHostIP // 6. NodeHostName // // Derived from k8s.io/kubernetes/pkg/util/node/node.go func nodeAddress(node *apiv1.Node) (string, map[apiv1.NodeAddressType][]string, error) { m := map[apiv1.NodeAddressType][]string{} for _, a := range node.Status.Addresses { m[a.Type] = append(m[a.Type], a.Address) } if addresses, ok := m[apiv1.NodeInternalIP]; ok { return addresses[0], m, nil } if addresses, ok := m[apiv1.NodeInternalDNS]; ok { return addresses[0], m, nil } if addresses, ok := m[apiv1.NodeExternalIP]; ok { return addresses[0], m, nil } if addresses, ok := m[apiv1.NodeExternalDNS]; ok { return addresses[0], m, nil } if addresses, ok := m[apiv1.NodeAddressType(NodeLegacyHostIP)]; ok { return addresses[0], m, nil } if addresses, ok := m[apiv1.NodeHostName]; ok { return addresses[0], m, nil } return "", m, errors.New("host address unknown") } prometheus-2.15.2+ds/discovery/kubernetes/node_test.go000066400000000000000000000106621360540074000230730ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "fmt" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func makeNode(name, address string, labels map[string]string, annotations map[string]string) *v1.Node { return &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: name, Labels: labels, Annotations: annotations, }, Status: v1.NodeStatus{ Addresses: []v1.NodeAddress{ { Type: v1.NodeInternalIP, Address: address, }, }, DaemonEndpoints: v1.NodeDaemonEndpoints{ KubeletEndpoint: v1.DaemonEndpoint{ Port: 10250, }, }, }, } } func makeEnumeratedNode(i int) *v1.Node { return makeNode(fmt.Sprintf("test%d", i), "1.2.3.4", map[string]string{}, map[string]string{}) } func TestNodeDiscoveryBeforeStart(t *testing.T) { n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := makeNode( "test", "1.2.3.4", map[string]string{"test-label": "testvalue"}, map[string]string{"test-annotation": "testannotationvalue"}, ) c.CoreV1().Nodes().Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "node/test": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:10250", "instance": "test", "__meta_kubernetes_node_address_InternalIP": "1.2.3.4", }, }, Labels: model.LabelSet{ "__meta_kubernetes_node_name": "test", "__meta_kubernetes_node_label_test_label": "testvalue", "__meta_kubernetes_node_labelpresent_test_label": "true", "__meta_kubernetes_node_annotation_test_annotation": "testannotationvalue", "__meta_kubernetes_node_annotationpresent_test_annotation": "true", }, Source: "node/test", }, }, }.Run(t) } func TestNodeDiscoveryAdd(t *testing.T) { n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeEnumeratedNode(1) c.CoreV1().Nodes().Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "node/test1": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:10250", "instance": "test1", "__meta_kubernetes_node_address_InternalIP": "1.2.3.4", }, }, Labels: model.LabelSet{ "__meta_kubernetes_node_name": "test1", }, Source: "node/test1", }, }, }.Run(t) } func TestNodeDiscoveryDelete(t *testing.T) { obj := makeEnumeratedNode(0) n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { c.CoreV1().Nodes().Delete(obj.Name, &metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "node/test0": { Source: "node/test0", }, }, }.Run(t) } func TestNodeDiscoveryUpdate(t *testing.T) { n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj1 := makeEnumeratedNode(0) c.CoreV1().Nodes().Create(obj1) obj2 := makeNode( "test0", "1.2.3.4", map[string]string{"Unschedulable": "true"}, map[string]string{}, ) c.CoreV1().Nodes().Update(obj2) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "node/test0": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:10250", "instance": "test0", "__meta_kubernetes_node_address_InternalIP": "1.2.3.4", }, }, Labels: model.LabelSet{ "__meta_kubernetes_node_label_Unschedulable": "true", "__meta_kubernetes_node_labelpresent_Unschedulable": "true", "__meta_kubernetes_node_name": "test0", }, Source: "node/test0", }, }, }.Run(t) } prometheus-2.15.2+ds/discovery/kubernetes/pod.go000066400000000000000000000176031360540074000216730ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "net" "strconv" "strings" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) // Pod discovers new pod targets. type Pod struct { informer cache.SharedInformer store cache.Store logger log.Logger queue *workqueue.Type } // NewPod creates a new pod discovery. func NewPod(l log.Logger, pods cache.SharedInformer) *Pod { if l == nil { l = log.NewNopLogger() } p := &Pod{ informer: pods, store: pods.GetStore(), logger: l, queue: workqueue.NewNamed("pod"), } p.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { eventCount.WithLabelValues("pod", "add").Inc() p.enqueue(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("pod", "delete").Inc() p.enqueue(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("pod", "update").Inc() p.enqueue(o) }, }) return p } func (p *Pod) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } p.queue.Add(key) } // Run implements the Discoverer interface. func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer p.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), p.informer.HasSynced) { if ctx.Err() != context.Canceled { level.Error(p.logger).Log("msg", "pod informer unable to sync cache") } return } go func() { for p.process(ctx, ch) { } }() // Block until the target provider is explicitly canceled. <-ctx.Done() } func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool { keyObj, quit := p.queue.Get() if quit { return false } defer p.queue.Done(keyObj) key := keyObj.(string) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { return true } o, exists, err := p.store.GetByKey(key) if err != nil { return true } if !exists { send(ctx, p.logger, RolePod, ch, &targetgroup.Group{Source: podSourceFromNamespaceAndName(namespace, name)}) return true } eps, err := convertToPod(o) if err != nil { level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err) return true } send(ctx, p.logger, RolePod, ch, p.buildPod(eps)) return true } func convertToPod(o interface{}) (*apiv1.Pod, error) { pod, ok := o.(*apiv1.Pod) if ok { return pod, nil } return nil, errors.Errorf("received unexpected object: %v", o) } const ( podNameLabel = metaLabelPrefix + "pod_name" podIPLabel = metaLabelPrefix + "pod_ip" podContainerNameLabel = metaLabelPrefix + "pod_container_name" podContainerPortNameLabel = metaLabelPrefix + "pod_container_port_name" podContainerPortNumberLabel = metaLabelPrefix + "pod_container_port_number" podContainerPortProtocolLabel = metaLabelPrefix + "pod_container_port_protocol" podContainerIsInit = metaLabelPrefix + "pod_container_init" podReadyLabel = metaLabelPrefix + "pod_ready" podPhaseLabel = metaLabelPrefix + "pod_phase" podLabelPrefix = metaLabelPrefix + "pod_label_" podLabelPresentPrefix = metaLabelPrefix + "pod_labelpresent_" podAnnotationPrefix = metaLabelPrefix + "pod_annotation_" podAnnotationPresentPrefix = metaLabelPrefix + "pod_annotationpresent_" podNodeNameLabel = metaLabelPrefix + "pod_node_name" podHostIPLabel = metaLabelPrefix + "pod_host_ip" podUID = metaLabelPrefix + "pod_uid" podControllerKind = metaLabelPrefix + "pod_controller_kind" podControllerName = metaLabelPrefix + "pod_controller_name" ) // GetControllerOf returns a pointer to a copy of the controllerRef if controllee has a controller // https://github.com/kubernetes/apimachinery/blob/cd2cae2b39fa57e8063fa1f5f13cfe9862db3d41/pkg/apis/meta/v1/controller_ref.go func GetControllerOf(controllee metav1.Object) *metav1.OwnerReference { for _, ref := range controllee.GetOwnerReferences() { if ref.Controller != nil && *ref.Controller { return &ref } } return nil } func podLabels(pod *apiv1.Pod) model.LabelSet { ls := model.LabelSet{ podNameLabel: lv(pod.ObjectMeta.Name), podIPLabel: lv(pod.Status.PodIP), podReadyLabel: podReady(pod), podPhaseLabel: lv(string(pod.Status.Phase)), podNodeNameLabel: lv(pod.Spec.NodeName), podHostIPLabel: lv(pod.Status.HostIP), podUID: lv(string(pod.ObjectMeta.UID)), } createdBy := GetControllerOf(pod) if createdBy != nil { if createdBy.Kind != "" { ls[podControllerKind] = lv(createdBy.Kind) } if createdBy.Name != "" { ls[podControllerName] = lv(createdBy.Name) } } for k, v := range pod.Labels { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(podLabelPrefix+ln)] = lv(v) ls[model.LabelName(podLabelPresentPrefix+ln)] = presentValue } for k, v := range pod.Annotations { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(podAnnotationPrefix+ln)] = lv(v) ls[model.LabelName(podAnnotationPresentPrefix+ln)] = presentValue } return ls } func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { tg := &targetgroup.Group{ Source: podSource(pod), } // PodIP can be empty when a pod is starting or has been evicted. if len(pod.Status.PodIP) == 0 { return tg } tg.Labels = podLabels(pod) tg.Labels[namespaceLabel] = lv(pod.Namespace) containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) for i, c := range containers { isInit := i >= len(pod.Spec.Containers) // If no ports are defined for the container, create an anonymous // target per container. if len(c.Ports) == 0 { // We don't have a port so we just set the address label to the pod IP. // The user has to add a port manually. tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: lv(pod.Status.PodIP), podContainerNameLabel: lv(c.Name), podContainerIsInit: lv(strconv.FormatBool(isInit)), }) continue } // Otherwise create one target for each container/port combination. for _, port := range c.Ports { ports := strconv.FormatUint(uint64(port.ContainerPort), 10) addr := net.JoinHostPort(pod.Status.PodIP, ports) tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: lv(addr), podContainerNameLabel: lv(c.Name), podContainerPortNumberLabel: lv(ports), podContainerPortNameLabel: lv(port.Name), podContainerPortProtocolLabel: lv(string(port.Protocol)), podContainerIsInit: lv(strconv.FormatBool(isInit)), }) } } return tg } func podSource(pod *apiv1.Pod) string { return podSourceFromNamespaceAndName(pod.Namespace, pod.Name) } func podSourceFromNamespaceAndName(namespace, name string) string { return "pod/" + namespace + "/" + name } func podReady(pod *apiv1.Pod) model.LabelValue { for _, cond := range pod.Status.Conditions { if cond.Type == apiv1.PodReady { return lv(strings.ToLower(string(cond.Status))) } } return lv(strings.ToLower(string(apiv1.ConditionUnknown))) } prometheus-2.15.2+ds/discovery/kubernetes/pod_test.go000066400000000000000000000245141360540074000227310ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "fmt" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) func makeOptionalBool(v bool) *bool { return &v } func makeMultiPortPods() *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "default", Labels: map[string]string{"test/label": "testvalue"}, Annotations: map[string]string{"test/annotation": "testannotationvalue"}, UID: types.UID("abc123"), OwnerReferences: []metav1.OwnerReference{ { Kind: "testcontrollerkind", Name: "testcontrollername", Controller: makeOptionalBool(true), }, }, }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "testcontainer0", Ports: []v1.ContainerPort{ { Name: "testport0", Protocol: v1.ProtocolTCP, ContainerPort: int32(9000), }, { Name: "testport1", Protocol: v1.ProtocolUDP, ContainerPort: int32(9001), }, }, }, { Name: "testcontainer1", }, }, }, Status: v1.PodStatus{ PodIP: "1.2.3.4", HostIP: "2.3.4.5", Phase: "Running", Conditions: []v1.PodCondition{ { Type: v1.PodReady, Status: v1.ConditionTrue, }, }, }, } } func makePods() *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "default", UID: types.UID("abc123"), }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "testcontainer", Ports: []v1.ContainerPort{ { Name: "testport", Protocol: v1.ProtocolTCP, ContainerPort: int32(9000), }, }, }, }, }, Status: v1.PodStatus{ PodIP: "1.2.3.4", HostIP: "2.3.4.5", Phase: "Running", Conditions: []v1.PodCondition{ { Type: v1.PodReady, Status: v1.ConditionTrue, }, }, }, } } func makeInitContainerPods() *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "default", UID: types.UID("abc123"), }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "testcontainer", Ports: []v1.ContainerPort{ { Name: "testport", Protocol: v1.ProtocolTCP, ContainerPort: int32(9000), }, }, }, }, InitContainers: []v1.Container{ { Name: "initcontainer", }, }, }, Status: v1.PodStatus{ PodIP: "1.2.3.4", HostIP: "2.3.4.5", Phase: "Pending", Conditions: []v1.PodCondition{ { Type: v1.PodReady, Status: v1.ConditionFalse, }, }, }, } } func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group { key := fmt.Sprintf("pod/%s/testpod", ns) return map[string]*targetgroup.Group{ key: { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_pod_container_name": "testcontainer", "__meta_kubernetes_pod_container_port_name": "testport", "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_namespace": lv(ns), "__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_ip": "1.2.3.4", "__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_ready": "true", "__meta_kubernetes_pod_phase": "Running", "__meta_kubernetes_pod_uid": "abc123", }, Source: key, }, } } func TestPodDiscoveryBeforeRun(t *testing.T) { n, c := makeDiscovery(RolePod, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := makeMultiPortPods() c.CoreV1().Pods(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ "pod/default/testpod": { Targets: []model.LabelSet{ { "__address__": "1.2.3.4:9000", "__meta_kubernetes_pod_container_name": "testcontainer0", "__meta_kubernetes_pod_container_port_name": "testport0", "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", "__meta_kubernetes_pod_container_name": "testcontainer0", "__meta_kubernetes_pod_container_port_name": "testport1", "__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_protocol": "UDP", "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4", "__meta_kubernetes_pod_container_name": "testcontainer1", "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ "__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_namespace": "default", "__meta_kubernetes_pod_label_test_label": "testvalue", "__meta_kubernetes_pod_labelpresent_test_label": "true", "__meta_kubernetes_pod_annotation_test_annotation": "testannotationvalue", "__meta_kubernetes_pod_annotationpresent_test_annotation": "true", "__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_ip": "1.2.3.4", "__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_ready": "true", "__meta_kubernetes_pod_phase": "Running", "__meta_kubernetes_pod_uid": "abc123", "__meta_kubernetes_pod_controller_kind": "testcontrollerkind", "__meta_kubernetes_pod_controller_name": "testcontrollername", }, Source: "pod/default/testpod", }, }, }.Run(t) } func TestPodDiscoveryInitContainer(t *testing.T) { n, c := makeDiscovery(RolePod, NamespaceDiscovery{}) ns := "default" key := fmt.Sprintf("pod/%s/testpod", ns) expected := expectedPodTargetGroups(ns) expected[key].Targets = append(expected[key].Targets, model.LabelSet{ "__address__": "1.2.3.4", "__meta_kubernetes_pod_container_name": "initcontainer", "__meta_kubernetes_pod_container_init": "true", }) expected[key].Labels["__meta_kubernetes_pod_phase"] = "Pending" expected[key].Labels["__meta_kubernetes_pod_ready"] = "false" k8sDiscoveryTest{ discovery: n, beforeRun: func() { obj := makeInitContainerPods() c.CoreV1().Pods(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: expected, }.Run(t) } func TestPodDiscoveryAdd(t *testing.T) { n, c := makeDiscovery(RolePod, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makePods() c.CoreV1().Pods(obj.Namespace).Create(obj) }, expectedMaxItems: 1, expectedRes: expectedPodTargetGroups("default"), }.Run(t) } func TestPodDiscoveryDelete(t *testing.T) { obj := makePods() n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makePods() c.CoreV1().Pods(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "pod/default/testpod": { Source: "pod/default/testpod", }, }, }.Run(t) } func TestPodDiscoveryUpdate(t *testing.T) { obj := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "testpod", Namespace: "default", UID: "xyz321", }, Spec: v1.PodSpec{ NodeName: "testnode", Containers: []v1.Container{ { Name: "testcontainer", Ports: []v1.ContainerPort{ { Name: "testport", Protocol: v1.ProtocolTCP, ContainerPort: int32(9000), }, }, }, }, }, Status: v1.PodStatus{ PodIP: "1.2.3.4", HostIP: "2.3.4.5", }, } n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makePods() c.CoreV1().Pods(obj.Namespace).Update(obj) }, expectedMaxItems: 2, expectedRes: expectedPodTargetGroups("default"), }.Run(t) } func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) { n, c := makeDiscovery(RolePod, NamespaceDiscovery{}) initialPod := makePods() updatedPod := makePods() updatedPod.Status.PodIP = "" k8sDiscoveryTest{ discovery: n, beforeRun: func() { c.CoreV1().Pods(initialPod.Namespace).Create(initialPod) }, afterStart: func() { c.CoreV1().Pods(updatedPod.Namespace).Update(updatedPod) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "pod/default/testpod": { Source: "pod/default/testpod", }, }, }.Run(t) } func TestPodDiscoveryNamespaces(t *testing.T) { n, c := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) expected := expectedPodTargetGroups("ns1") for k, v := range expectedPodTargetGroups("ns2") { expected[k] = v } k8sDiscoveryTest{ discovery: n, beforeRun: func() { for _, ns := range []string{"ns1", "ns2"} { pod := makePods() pod.Namespace = ns c.CoreV1().Pods(pod.Namespace).Create(pod) } }, expectedMaxItems: 2, expectedRes: expected, }.Run(t) } prometheus-2.15.2+ds/discovery/kubernetes/service.go000066400000000000000000000131651360540074000225500ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "context" "net" "strconv" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) // Service implements discovery of Kubernetes services. type Service struct { logger log.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewService returns a new service discovery. func NewService(l log.Logger, inf cache.SharedInformer) *Service { if l == nil { l = log.NewNopLogger() } s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")} s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { eventCount.WithLabelValues("service", "add").Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { eventCount.WithLabelValues("service", "delete").Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { eventCount.WithLabelValues("service", "update").Inc() s.enqueue(o) }, }) return s } func (s *Service) enqueue(obj interface{}) { key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } s.queue.Add(key) } // Run implements the Discoverer interface. func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer s.queue.ShutDown() if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) { if ctx.Err() != context.Canceled { level.Error(s.logger).Log("msg", "service informer unable to sync cache") } return } go func() { for s.process(ctx, ch) { } }() // Block until the target provider is explicitly canceled. <-ctx.Done() } func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool { keyObj, quit := s.queue.Get() if quit { return false } defer s.queue.Done(keyObj) key := keyObj.(string) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { return true } o, exists, err := s.store.GetByKey(key) if err != nil { return true } if !exists { send(ctx, s.logger, RoleService, ch, &targetgroup.Group{Source: serviceSourceFromNamespaceAndName(namespace, name)}) return true } eps, err := convertToService(o) if err != nil { level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err) return true } send(ctx, s.logger, RoleService, ch, s.buildService(eps)) return true } func convertToService(o interface{}) (*apiv1.Service, error) { service, ok := o.(*apiv1.Service) if ok { return service, nil } return nil, errors.Errorf("received unexpected object: %v", o) } func serviceSource(s *apiv1.Service) string { return serviceSourceFromNamespaceAndName(s.Namespace, s.Name) } func serviceSourceFromNamespaceAndName(namespace, name string) string { return "svc/" + namespace + "/" + name } const ( serviceNameLabel = metaLabelPrefix + "service_name" serviceLabelPrefix = metaLabelPrefix + "service_label_" serviceLabelPresentPrefix = metaLabelPrefix + "service_labelpresent_" serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_" serviceAnnotationPresentPrefix = metaLabelPrefix + "service_annotationpresent_" servicePortNameLabel = metaLabelPrefix + "service_port_name" servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol" serviceClusterIPLabel = metaLabelPrefix + "service_cluster_ip" serviceExternalNameLabel = metaLabelPrefix + "service_external_name" ) func serviceLabels(svc *apiv1.Service) model.LabelSet { // Each label and annotation will create two key-value pairs in the map. ls := make(model.LabelSet, 2*(len(svc.Labels)+len(svc.Annotations))+2) ls[serviceNameLabel] = lv(svc.Name) ls[namespaceLabel] = lv(svc.Namespace) for k, v := range svc.Labels { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(serviceLabelPrefix+ln)] = lv(v) ls[model.LabelName(serviceLabelPresentPrefix+ln)] = presentValue } for k, v := range svc.Annotations { ln := strutil.SanitizeLabelName(k) ls[model.LabelName(serviceAnnotationPrefix+ln)] = lv(v) ls[model.LabelName(serviceAnnotationPresentPrefix+ln)] = presentValue } return ls } func (s *Service) buildService(svc *apiv1.Service) *targetgroup.Group { tg := &targetgroup.Group{ Source: serviceSource(svc), } tg.Labels = serviceLabels(svc) for _, port := range svc.Spec.Ports { addr := net.JoinHostPort(svc.Name+"."+svc.Namespace+".svc", strconv.FormatInt(int64(port.Port), 10)) labelSet := model.LabelSet{ model.AddressLabel: lv(addr), servicePortNameLabel: lv(port.Name), servicePortProtocolLabel: lv(string(port.Protocol)), } if svc.Spec.Type == apiv1.ServiceTypeExternalName { labelSet[serviceExternalNameLabel] = lv(svc.Spec.ExternalName) } else { labelSet[serviceClusterIPLabel] = lv(svc.Spec.ClusterIP) } tg.Targets = append(tg.Targets, labelSet) } return tg } prometheus-2.15.2+ds/discovery/kubernetes/service_test.go000066400000000000000000000161561360540074000236120ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kubernetes import ( "fmt" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func makeMultiPortService() *v1.Service { return &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testservice", Namespace: "default", Labels: map[string]string{"test-label": "testvalue"}, Annotations: map[string]string{"test-annotation": "testannotationvalue"}, }, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ { Name: "testport0", Protocol: v1.ProtocolTCP, Port: int32(30900), }, { Name: "testport1", Protocol: v1.ProtocolUDP, Port: int32(30901), }, }, Type: v1.ServiceTypeClusterIP, ClusterIP: "10.0.0.1", }, } } func makeSuffixedService(suffix string) *v1.Service { return &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf("testservice%s", suffix), Namespace: "default", }, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ { Name: "testport", Protocol: v1.ProtocolTCP, Port: int32(30900), }, }, Type: v1.ServiceTypeClusterIP, ClusterIP: "10.0.0.1", }, } } func makeService() *v1.Service { return makeSuffixedService("") } func makeExternalService() *v1.Service { return &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "testservice-external", Namespace: "default", }, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ { Name: "testport", Protocol: v1.ProtocolTCP, Port: int32(31900), }, }, Type: v1.ServiceTypeExternalName, ExternalName: "FooExternalName", }, } } func TestServiceDiscoveryAdd(t *testing.T) { n, c := makeDiscovery(RoleService, NamespaceDiscovery{}) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeService() c.CoreV1().Services(obj.Namespace).Create(obj) obj = makeExternalService() c.CoreV1().Services(obj.Namespace).Create(obj) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "svc/default/testservice": { Targets: []model.LabelSet{ { "__meta_kubernetes_service_port_protocol": "TCP", "__address__": "testservice.default.svc:30900", "__meta_kubernetes_service_cluster_ip": "10.0.0.1", "__meta_kubernetes_service_port_name": "testport", }, }, Labels: model.LabelSet{ "__meta_kubernetes_service_name": "testservice", "__meta_kubernetes_namespace": "default", }, Source: "svc/default/testservice", }, "svc/default/testservice-external": { Targets: []model.LabelSet{ { "__meta_kubernetes_service_port_protocol": "TCP", "__address__": "testservice-external.default.svc:31900", "__meta_kubernetes_service_port_name": "testport", "__meta_kubernetes_service_external_name": "FooExternalName", }, }, Labels: model.LabelSet{ "__meta_kubernetes_service_name": "testservice-external", "__meta_kubernetes_namespace": "default", }, Source: "svc/default/testservice-external", }, }, }.Run(t) } func TestServiceDiscoveryDelete(t *testing.T) { n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeService() c.CoreV1().Services(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "svc/default/testservice": { Source: "svc/default/testservice", }, }, }.Run(t) } func TestServiceDiscoveryUpdate(t *testing.T) { n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeMultiPortService() c.CoreV1().Services(obj.Namespace).Update(obj) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "svc/default/testservice": { Targets: []model.LabelSet{ { "__meta_kubernetes_service_port_protocol": "TCP", "__address__": "testservice.default.svc:30900", "__meta_kubernetes_service_cluster_ip": "10.0.0.1", "__meta_kubernetes_service_port_name": "testport0", }, { "__meta_kubernetes_service_port_protocol": "UDP", "__address__": "testservice.default.svc:30901", "__meta_kubernetes_service_cluster_ip": "10.0.0.1", "__meta_kubernetes_service_port_name": "testport1", }, }, Labels: model.LabelSet{ "__meta_kubernetes_service_name": "testservice", "__meta_kubernetes_namespace": "default", "__meta_kubernetes_service_label_test_label": "testvalue", "__meta_kubernetes_service_labelpresent_test_label": "true", "__meta_kubernetes_service_annotation_test_annotation": "testannotationvalue", "__meta_kubernetes_service_annotationpresent_test_annotation": "true", }, Source: "svc/default/testservice", }, }, }.Run(t) } func TestServiceDiscoveryNamespaces(t *testing.T) { n, c := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) k8sDiscoveryTest{ discovery: n, afterStart: func() { for _, ns := range []string{"ns1", "ns2"} { obj := makeService() obj.Namespace = ns c.CoreV1().Services(obj.Namespace).Create(obj) } }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "svc/ns1/testservice": { Targets: []model.LabelSet{ { "__meta_kubernetes_service_port_protocol": "TCP", "__address__": "testservice.ns1.svc:30900", "__meta_kubernetes_service_cluster_ip": "10.0.0.1", "__meta_kubernetes_service_port_name": "testport", }, }, Labels: model.LabelSet{ "__meta_kubernetes_service_name": "testservice", "__meta_kubernetes_namespace": "ns1", }, Source: "svc/ns1/testservice", }, "svc/ns2/testservice": { Targets: []model.LabelSet{ { "__meta_kubernetes_service_port_protocol": "TCP", "__address__": "testservice.ns2.svc:30900", "__meta_kubernetes_service_cluster_ip": "10.0.0.1", "__meta_kubernetes_service_port_name": "testport", }, }, Labels: model.LabelSet{ "__meta_kubernetes_service_name": "testservice", "__meta_kubernetes_namespace": "ns2", }, Source: "svc/ns2/testservice", }, }, }.Run(t) } prometheus-2.15.2+ds/discovery/manager.go000066400000000000000000000321121360540074000203440ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package discovery import ( "context" "fmt" "reflect" "sync" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/prometheus/client_golang/prometheus" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/azure" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/ec2" "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/gce" "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/discovery/marathon" "github.com/prometheus/prometheus/discovery/openstack" "github.com/prometheus/prometheus/discovery/triton" "github.com/prometheus/prometheus/discovery/zookeeper" ) var ( failedConfigs = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "prometheus_sd_failed_configs", Help: "Current number of service discovery configurations that failed to load.", }, []string{"name"}, ) discoveredTargets = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "prometheus_sd_discovered_targets", Help: "Current number of discovered targets.", }, []string{"name", "config"}, ) receivedUpdates = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "prometheus_sd_received_updates_total", Help: "Total number of update events received from the SD providers.", }, []string{"name"}, ) delayedUpdates = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "prometheus_sd_updates_delayed_total", Help: "Total number of update events that couldn't be sent immediately.", }, []string{"name"}, ) sentUpdates = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "prometheus_sd_updates_total", Help: "Total number of update events sent to the SD consumers.", }, []string{"name"}, ) ) func init() { prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates) } // Discoverer provides information about target groups. It maintains a set // of sources from which TargetGroups can originate. Whenever a discovery provider // detects a potential change, it sends the TargetGroup through its channel. // // Discoverer does not know if an actual change happened. // It does guarantee that it sends the new TargetGroup whenever a change happens. // // Discoverers should initially send a full set of all discoverable TargetGroups. type Discoverer interface { // Run hands a channel to the discovery provider (Consul, DNS etc) through which it can send // updated target groups. // Must returns if the context gets canceled. It should not close the update // channel on returning. Run(ctx context.Context, up chan<- []*targetgroup.Group) } type poolKey struct { setName string provider string } // provider holds a Discoverer instance, its configuration and its subscribers. type provider struct { name string d Discoverer subs []string config interface{} } // NewManager is the Discovery Manager constructor. func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager { if logger == nil { logger = log.NewNopLogger() } mgr := &Manager{ logger: logger, syncCh: make(chan map[string][]*targetgroup.Group), targets: make(map[poolKey]map[string]*targetgroup.Group), discoverCancel: []context.CancelFunc{}, ctx: ctx, updatert: 5 * time.Second, triggerSend: make(chan struct{}, 1), } for _, option := range options { option(mgr) } return mgr } // Name sets the name of the manager. func Name(n string) func(*Manager) { return func(m *Manager) { m.mtx.Lock() defer m.mtx.Unlock() m.name = n } } // Manager maintains a set of discovery providers and sends each update to a map channel. // Targets are grouped by the target set name. type Manager struct { logger log.Logger name string mtx sync.RWMutex ctx context.Context discoverCancel []context.CancelFunc // Some Discoverers(eg. k8s) send only the updates for a given target group // so we use map[tg.Source]*targetgroup.Group to know which group to update. targets map[poolKey]map[string]*targetgroup.Group // providers keeps track of SD providers. providers []*provider // The sync channel sends the updates as a map where the key is the job value from the scrape config. syncCh chan map[string][]*targetgroup.Group // How long to wait before sending updates to the channel. The variable // should only be modified in unit tests. updatert time.Duration // The triggerSend channel signals to the manager that new updates have been received from providers. triggerSend chan struct{} } // Run starts the background processing func (m *Manager) Run() error { go m.sender() for range m.ctx.Done() { m.cancelDiscoverers() return m.ctx.Err() } return nil } // SyncCh returns a read only channel used by all the clients to receive target updates. func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group { return m.syncCh } // ApplyConfig removes all running discovery providers and starts new ones using the provided config. func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) error { m.mtx.Lock() defer m.mtx.Unlock() for pk := range m.targets { if _, ok := cfg[pk.setName]; !ok { discoveredTargets.DeleteLabelValues(m.name, pk.setName) } } m.cancelDiscoverers() m.targets = make(map[poolKey]map[string]*targetgroup.Group) m.providers = nil m.discoverCancel = nil failedCount := 0 for name, scfg := range cfg { failedCount += m.registerProviders(scfg, name) discoveredTargets.WithLabelValues(m.name, name).Set(0) } failedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) for _, prov := range m.providers { m.startProvider(m.ctx, prov) } return nil } // StartCustomProvider is used for sdtool. Only use this if you know what you're doing. func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker Discoverer) { p := &provider{ name: name, d: worker, subs: []string{name}, } m.providers = append(m.providers, p) m.startProvider(ctx, p) } func (m *Manager) startProvider(ctx context.Context, p *provider) { level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) ctx, cancel := context.WithCancel(ctx) updates := make(chan []*targetgroup.Group) m.discoverCancel = append(m.discoverCancel, cancel) go p.d.Run(ctx, updates) go m.updater(ctx, p, updates) } func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) { for { select { case <-ctx.Done(): return case tgs, ok := <-updates: receivedUpdates.WithLabelValues(m.name).Inc() if !ok { level.Debug(m.logger).Log("msg", "discoverer channel closed", "provider", p.name) return } for _, s := range p.subs { m.updateGroup(poolKey{setName: s, provider: p.name}, tgs) } select { case m.triggerSend <- struct{}{}: default: } } } } func (m *Manager) sender() { ticker := time.NewTicker(m.updatert) defer ticker.Stop() for { select { case <-m.ctx.Done(): return case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. select { case <-m.triggerSend: sentUpdates.WithLabelValues(m.name).Inc() select { case m.syncCh <- m.allGroups(): default: delayedUpdates.WithLabelValues(m.name).Inc() level.Debug(m.logger).Log("msg", "discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: default: } } default: } } } } func (m *Manager) cancelDiscoverers() { for _, c := range m.discoverCancel { c() } } func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) { m.mtx.Lock() defer m.mtx.Unlock() for _, tg := range tgs { if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics. if _, ok := m.targets[poolKey]; !ok { m.targets[poolKey] = make(map[string]*targetgroup.Group) } m.targets[poolKey][tg.Source] = tg } } } func (m *Manager) allGroups() map[string][]*targetgroup.Group { m.mtx.Lock() defer m.mtx.Unlock() tSets := map[string][]*targetgroup.Group{} for pkey, tsets := range m.targets { var n int for _, tg := range tsets { // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager' // to signal that it needs to stop all scrape loops for this target set. tSets[pkey.setName] = append(tSets[pkey.setName], tg) n += len(tg.Targets) } discoveredTargets.WithLabelValues(m.name, pkey.setName).Set(float64(n)) } return tSets } // registerProviders returns a number of failed SD config. func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) int { var ( failedCount int added bool ) add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) { t := reflect.TypeOf(cfg).String() for _, p := range m.providers { if reflect.DeepEqual(cfg, p.config) { p.subs = append(p.subs, setName) added = true return } } d, err := newDiscoverer() if err != nil { level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t) failedCount++ return } provider := provider{ name: fmt.Sprintf("%s/%d", t, len(m.providers)), d: d, config: cfg, subs: []string{setName}, } m.providers = append(m.providers, &provider) added = true } for _, c := range cfg.DNSSDConfigs { add(c, func() (Discoverer, error) { return dns.NewDiscovery(*c, log.With(m.logger, "discovery", "dns")), nil }) } for _, c := range cfg.FileSDConfigs { add(c, func() (Discoverer, error) { return file.NewDiscovery(c, log.With(m.logger, "discovery", "file")), nil }) } for _, c := range cfg.ConsulSDConfigs { add(c, func() (Discoverer, error) { return consul.NewDiscovery(c, log.With(m.logger, "discovery", "consul")) }) } for _, c := range cfg.MarathonSDConfigs { add(c, func() (Discoverer, error) { return marathon.NewDiscovery(*c, log.With(m.logger, "discovery", "marathon")) }) } for _, c := range cfg.KubernetesSDConfigs { add(c, func() (Discoverer, error) { return kubernetes.New(log.With(m.logger, "discovery", "k8s"), c) }) } for _, c := range cfg.ServersetSDConfigs { add(c, func() (Discoverer, error) { return zookeeper.NewServersetDiscovery(c, log.With(m.logger, "discovery", "zookeeper")) }) } for _, c := range cfg.NerveSDConfigs { add(c, func() (Discoverer, error) { return zookeeper.NewNerveDiscovery(c, log.With(m.logger, "discovery", "nerve")) }) } for _, c := range cfg.EC2SDConfigs { add(c, func() (Discoverer, error) { return ec2.NewDiscovery(c, log.With(m.logger, "discovery", "ec2")), nil }) } for _, c := range cfg.OpenstackSDConfigs { add(c, func() (Discoverer, error) { return openstack.NewDiscovery(c, log.With(m.logger, "discovery", "openstack")) }) } for _, c := range cfg.GCESDConfigs { add(c, func() (Discoverer, error) { return gce.NewDiscovery(*c, log.With(m.logger, "discovery", "gce")) }) } for _, c := range cfg.AzureSDConfigs { add(c, func() (Discoverer, error) { return azure.NewDiscovery(c, log.With(m.logger, "discovery", "azure")), nil }) } for _, c := range cfg.TritonSDConfigs { add(c, func() (Discoverer, error) { return triton.New(log.With(m.logger, "discovery", "triton"), c) }) } if len(cfg.StaticConfigs) > 0 { add(setName, func() (Discoverer, error) { return &StaticProvider{TargetGroups: cfg.StaticConfigs}, nil }) } if !added { // Add an empty target group to force the refresh of the corresponding // scrape pool and to notify the receiver that this target set has no // current targets. // It can happen because the combined set of SD configurations is empty // or because we fail to instantiate all the SD configurations. add(setName, func() (Discoverer, error) { return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil }) } return failedCount } // StaticProvider holds a list of target groups that never change. type StaticProvider struct { TargetGroups []*targetgroup.Group } // Run implements the Worker interface. func (sd *StaticProvider) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // We still have to consider that the consumer exits right away in which case // the context will be canceled. select { case ch <- sd.TargetGroups: case <-ctx.Done(): } close(ch) } prometheus-2.15.2+ds/discovery/manager_test.go000066400000000000000000000736761360540074000214270ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package discovery import ( "context" "fmt" "io/ioutil" "os" "reflect" "sort" "strconv" "testing" "time" "github.com/go-kit/kit/log" "github.com/prometheus/client_golang/prometheus/testutil" common_config "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "gopkg.in/yaml.v2" ) // TestTargetUpdatesOrder checks that the target updates are received in the expected order. func TestTargetUpdatesOrder(t *testing.T) { // The order by which the updates are send is determined by the interval passed to the mock discovery adapter // Final targets array is ordered alphabetically by the name of the discoverer. // For example discoverer "A" with targets "t2,t3" and discoverer "B" with targets "t1,t2" will result in "t2,t3,t1,t2" after the merge. testCases := []struct { title string updates map[string][]update expectedTargets [][]*targetgroup.Group }{ { title: "Single TP no updates", updates: map[string][]update{ "tp1": {}, }, expectedTargets: nil, }, { title: "Multiple TPs no updates", updates: map[string][]update{ "tp1": {}, "tp2": {}, "tp3": {}, }, expectedTargets: nil, }, { title: "Single TP empty initials", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{}, interval: 5 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ {}, }, }, { title: "Multiple TPs empty initials", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{}, interval: 5 * time.Millisecond, }, }, "tp2": { { targetGroups: []targetgroup.Group{}, interval: 200 * time.Millisecond, }, }, "tp3": { { targetGroups: []targetgroup.Group{}, interval: 100 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ {}, {}, {}, }, }, { title: "Single TP initials only", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }}, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, }, { title: "Multiple TPs initials only", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, }, "tp2": { { targetGroups: []targetgroup.Group{ { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, }, interval: 10 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, }, }, }, { title: "Single TP initials followed by empty updates", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, interval: 0, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{}, }, { Source: "tp1_group2", Targets: []model.LabelSet{}, }, }, interval: 10 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{}, }, { Source: "tp1_group2", Targets: []model.LabelSet{}, }, }, }, }, { title: "Single TP initials and new groups", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, interval: 0, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, { Source: "tp1_group3", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, interval: 10 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, { Source: "tp1_group3", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, }, }, { title: "Multiple TPs initials and new groups", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group3", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group4", Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, interval: 500 * time.Millisecond, }, }, "tp2": { { targetGroups: []targetgroup.Group{ { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, interval: 100 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp2_group3", Targets: []model.LabelSet{{"__instance__": "7"}}, }, { Source: "tp2_group4", Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, interval: 10 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, { Source: "tp2_group3", Targets: []model.LabelSet{{"__instance__": "7"}}, }, { Source: "tp2_group4", Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, { Source: "tp1_group3", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group4", Targets: []model.LabelSet{{"__instance__": "4"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, { Source: "tp2_group3", Targets: []model.LabelSet{{"__instance__": "7"}}, }, { Source: "tp2_group4", Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, }, }, { title: "One TP initials arrive after other TP updates.", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, interval: 150 * time.Millisecond, }, }, "tp2": { { targetGroups: []targetgroup.Group{ { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, interval: 200 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "7"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, interval: 100 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "5"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "6"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, { Source: "tp2_group1", Targets: []model.LabelSet{{"__instance__": "7"}}, }, { Source: "tp2_group2", Targets: []model.LabelSet{{"__instance__": "8"}}, }, }, }, }, { title: "Single TP empty update in between", updates: map[string][]update{ "tp1": { { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, interval: 30 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{}, }, { Source: "tp1_group2", Targets: []model.LabelSet{}, }, }, interval: 10 * time.Millisecond, }, { targetGroups: []targetgroup.Group{ { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, interval: 300 * time.Millisecond, }, }, }, expectedTargets: [][]*targetgroup.Group{ { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{}, }, { Source: "tp1_group2", Targets: []model.LabelSet{}, }, }, { { Source: "tp1_group1", Targets: []model.LabelSet{{"__instance__": "3"}}, }, { Source: "tp1_group2", Targets: []model.LabelSet{{"__instance__": "4"}}, }, }, }, }, } for i, tc := range testCases { tc := tc t.Run(tc.title, func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond var totalUpdatesCount int provUpdates := make(chan []*targetgroup.Group) for _, up := range tc.updates { go newMockDiscoveryProvider(up...).Run(ctx, provUpdates) if len(up) > 0 { totalUpdatesCount = totalUpdatesCount + len(up) } } Loop: for x := 0; x < totalUpdatesCount; x++ { select { case <-ctx.Done(): t.Errorf("%d: no update arrived within the timeout limit", x) break Loop case tgs := <-provUpdates: discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs) for _, got := range discoveryManager.allGroups() { assertEqualGroups(t, got, tc.expectedTargets[x], func(got, expected string) string { return fmt.Sprintf("%d: \ntargets mismatch \ngot: %v \nexpected: %v", x, got, expected) }) } } } }) } } func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group, msg func(got, expected string) string) { t.Helper() format := func(groups []*targetgroup.Group) string { var s string for i, group := range groups { if i > 0 { s += "," } s += group.Source + ":" + fmt.Sprint(group.Targets) } return s } // Need to sort by the groups's source as the received order is not guaranteed. sort.Sort(byGroupSource(got)) sort.Sort(byGroupSource(expected)) if !reflect.DeepEqual(got, expected) { t.Errorf(msg(format(got), format(expected))) } } func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { t.Helper() if _, ok := tSets[poolKey]; !ok { t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) return } match := false var mergedTargets string for _, targetGroup := range tSets[poolKey] { for _, l := range targetGroup.Targets { mergedTargets = mergedTargets + " " + l.String() if l.String() == label { match = true } } } if match != present { msg := "" if !present { msg = "not" } t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets) } } func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]sd_config.ServiceDiscoveryConfig{ "prometheus": sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ &targetgroup.Group{ Source: "0", Targets: []model.LabelSet{ model.LabelSet{ model.AddressLabel: model.LabelValue("foo:9090"), }, }, }, &targetgroup.Group{ Source: "1", Targets: []model.LabelSet{ model.LabelSet{ model.AddressLabel: model.LabelValue("bar:9090"), }, }, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true) verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", true) c["prometheus"] = sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ &targetgroup.Group{ Source: "0", Targets: []model.LabelSet{ model.LabelSet{ model.AddressLabel: model.LabelValue("foo:9090"), }, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true) verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", false) } // TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after // removing all targets from the static_configs sends an update with empty targetGroups. // This is required to signal the receiver that this target set has no current targets. func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]sd_config.ServiceDiscoveryConfig{ "prometheus": sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ &targetgroup.Group{ Source: "0", Targets: []model.LabelSet{ model.LabelSet{ model.AddressLabel: model.LabelValue("foo:9090"), }, }, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true) c["prometheus"] = sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{}, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() pkey := poolKey{setName: "prometheus", provider: "string/0"} targetGroups, ok := discoveryManager.targets[pkey] if !ok { t.Fatalf("'%v' should be present in target groups", pkey) } group, ok := targetGroups[""] if !ok { t.Fatalf("missing '' key in target groups %v", targetGroups) } if len(group.Targets) != 0 { t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets)) } } func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { tmpFile, err := ioutil.TempFile("", "sd") if err != nil { t.Fatalf("error creating temporary file: %v", err) } defer os.Remove(tmpFile.Name()) if _, err := tmpFile.Write([]byte(`[{"targets": ["foo:9090"]}]`)); err != nil { t.Fatalf("error writing temporary file: %v", err) } if err := tmpFile.Close(); err != nil { t.Fatalf("error closing temporary file: %v", err) } tmpFile2 := fmt.Sprintf("%s.json", tmpFile.Name()) if err = os.Link(tmpFile.Name(), tmpFile2); err != nil { t.Fatalf("error linking temporary file: %v", err) } defer os.Remove(tmpFile2) ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, nil) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]sd_config.ServiceDiscoveryConfig{ "prometheus": sd_config.ServiceDiscoveryConfig{ FileSDConfigs: []*file.SDConfig{ &file.SDConfig{ Files: []string{ tmpFile2, }, RefreshInterval: file.DefaultSDConfig.RefreshInterval, }, }, }, "prometheus2": sd_config.ServiceDiscoveryConfig{ FileSDConfigs: []*file.SDConfig{ &file.SDConfig{ Files: []string{ tmpFile2, }, RefreshInterval: file.DefaultSDConfig.RefreshInterval, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "*file.SDConfig/0"}, "{__address__=\"foo:9090\"}", true) verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus2", provider: "*file.SDConfig/0"}, "{__address__=\"foo:9090\"}", true) if len(discoveryManager.providers) != 1 { t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers)) } } func TestApplyConfigDoesNotModifyStaticProviderTargets(t *testing.T) { cfgText := ` scrape_configs: - job_name: 'prometheus' static_configs: - targets: ["foo:9090"] - targets: ["bar:9090"] - targets: ["baz:9090"] ` originalConfig := &config.Config{} if err := yaml.UnmarshalStrict([]byte(cfgText), originalConfig); err != nil { t.Fatalf("Unable to load YAML config cfgYaml: %s", err) } processedConfig := &config.Config{} if err := yaml.UnmarshalStrict([]byte(cfgText), processedConfig); err != nil { t.Fatalf("Unable to load YAML config cfgYaml: %s", err) } ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]sd_config.ServiceDiscoveryConfig{ "prometheus": processedConfig.ScrapeConfigs[0].ServiceDiscoveryConfig, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() origSdcfg := originalConfig.ScrapeConfigs[0].ServiceDiscoveryConfig for _, sdcfg := range c { if !reflect.DeepEqual(origSdcfg.StaticConfigs, sdcfg.StaticConfigs) { t.Fatalf("discovery manager modified static config \n expected: %v\n got: %v\n", origSdcfg.StaticConfigs, sdcfg.StaticConfigs) } } } func TestGaugeFailedConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() discoveryManager := NewManager(ctx, log.NewNopLogger()) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]sd_config.ServiceDiscoveryConfig{ "prometheus": sd_config.ServiceDiscoveryConfig{ ConsulSDConfigs: []*consul.SDConfig{ &consul.SDConfig{ Server: "foo:8500", TLSConfig: common_config.TLSConfig{ CertFile: "/tmp/non_existent", }, }, &consul.SDConfig{ Server: "bar:8500", TLSConfig: common_config.TLSConfig{ CertFile: "/tmp/non_existent", }, }, &consul.SDConfig{ Server: "foo2:8500", TLSConfig: common_config.TLSConfig{ CertFile: "/tmp/non_existent", }, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() failedCount := testutil.ToFloat64(failedConfigs) if failedCount != 3 { t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) } c["prometheus"] = sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ &targetgroup.Group{ Source: "0", Targets: []model.LabelSet{ model.LabelSet{ model.AddressLabel: "foo:9090", }, }, }, }, } discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() failedCount = testutil.ToFloat64(failedConfigs) if failedCount != 0 { t.Fatalf("Expected to get no failed config, got: %v", failedCount) } } func TestCoordinationWithReceiver(t *testing.T) { updateDelay := 100 * time.Millisecond type expect struct { delay time.Duration tgs map[string][]*targetgroup.Group } testCases := []struct { title string providers map[string]Discoverer expected []expect }{ { title: "Receiver should get all updates even when one provider closes its channel", providers: map[string]Discoverer{ "once1": &onceProvider{ tgs: []*targetgroup.Group{ { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, }, "mock1": newMockDiscoveryProvider( update{ interval: 2 * updateDelay, targetGroups: []targetgroup.Group{ { Source: "tg2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, ), }, expected: []expect{ { tgs: map[string][]*targetgroup.Group{ "once1": { { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, }, }, { tgs: map[string][]*targetgroup.Group{ "once1": { { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, "mock1": { { Source: "tg2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, }, }, }, { title: "Receiver should get all updates even when the channel is blocked", providers: map[string]Discoverer{ "mock1": newMockDiscoveryProvider( update{ targetGroups: []targetgroup.Group{ { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, }, update{ interval: 4 * updateDelay, targetGroups: []targetgroup.Group{ { Source: "tg2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, ), }, expected: []expect{ { delay: 2 * updateDelay, tgs: map[string][]*targetgroup.Group{ "mock1": { { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, }, }, { delay: 4 * updateDelay, tgs: map[string][]*targetgroup.Group{ "mock1": { { Source: "tg1", Targets: []model.LabelSet{{"__instance__": "1"}}, }, { Source: "tg2", Targets: []model.LabelSet{{"__instance__": "2"}}, }, }, }, }, }, }, } for _, tc := range testCases { tc := tc t.Run(tc.title, func(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() mgr := NewManager(ctx, nil) mgr.updatert = updateDelay go mgr.Run() for name, p := range tc.providers { mgr.StartCustomProvider(ctx, name, p) } for i, expected := range tc.expected { time.Sleep(expected.delay) select { case <-ctx.Done(): t.Fatalf("step %d: no update received in the expected timeframe", i) case tgs, ok := <-mgr.SyncCh(): if !ok { t.Fatalf("step %d: discovery manager channel is closed", i) } if len(tgs) != len(expected.tgs) { t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v", i, len(tgs), len(expected.tgs), tgs, expected.tgs) } for k := range expected.tgs { if _, ok := tgs[k]; !ok { t.Fatalf("step %d: target group not found: %s\ngot: %#v", i, k, tgs) } assertEqualGroups(t, tgs[k], expected.tgs[k], func(got, expected string) string { return fmt.Sprintf("step %d: targets mismatch \ngot: %q \nexpected: %q", i, got, expected) }) } } } }) } } type update struct { targetGroups []targetgroup.Group interval time.Duration } type mockdiscoveryProvider struct { updates []update } func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider { tp := mockdiscoveryProvider{ updates: updates, } return tp } func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) { for _, u := range tp.updates { if u.interval > 0 { t := time.NewTicker(u.interval) defer t.Stop() Loop: for { select { case <-ctx.Done(): return case <-t.C: break Loop } } } tgs := make([]*targetgroup.Group, len(u.targetGroups)) for i := range u.targetGroups { tgs[i] = &u.targetGroups[i] } upCh <- tgs } <-ctx.Done() } // byGroupSource implements sort.Interface so we can sort by the Source field. type byGroupSource []*targetgroup.Group func (a byGroupSource) Len() int { return len(a) } func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source } // onceProvider sends updates once (if any) and closes the update channel. type onceProvider struct { tgs []*targetgroup.Group } func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { if len(o.tgs) > 0 { ch <- o.tgs } close(ch) } prometheus-2.15.2+ds/discovery/marathon/000077500000000000000000000000001360540074000202155ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/marathon/marathon.go000066400000000000000000000401441360540074000223600ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package marathon import ( "context" "encoding/json" "fmt" "io" "io/ioutil" "math/rand" "net" "net/http" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( // metaLabelPrefix is the meta prefix used for all meta labels in this discovery. metaLabelPrefix = model.MetaLabelPrefix + "marathon_" // appLabelPrefix is the prefix for the application labels. appLabelPrefix = metaLabelPrefix + "app_label_" // appLabel is used for the name of the app in Marathon. appLabel model.LabelName = metaLabelPrefix + "app" // imageLabel is the label that is used for the docker image running the service. imageLabel model.LabelName = metaLabelPrefix + "image" // portIndexLabel is the integer port index when multiple ports are defined; // e.g. PORT1 would have a value of '1' portIndexLabel model.LabelName = metaLabelPrefix + "port_index" // taskLabel contains the mesos task name of the app instance. taskLabel model.LabelName = metaLabelPrefix + "task" // portMappingLabelPrefix is the prefix for the application portMappings labels. portMappingLabelPrefix = metaLabelPrefix + "port_mapping_label_" // portDefinitionLabelPrefix is the prefix for the application portDefinitions labels. portDefinitionLabelPrefix = metaLabelPrefix + "port_definition_label_" ) // DefaultSDConfig is the default Marathon SD configuration. var DefaultSDConfig = SDConfig{ RefreshInterval: model.Duration(30 * time.Second), } // SDConfig is the configuration for services running on Marathon. type SDConfig struct { Servers []string `yaml:"servers,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` AuthToken config_util.Secret `yaml:"auth_token,omitempty"` AuthTokenFile string `yaml:"auth_token_file,omitempty"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.Servers) == 0 { return errors.New("marathon_sd: must contain at least one Marathon server") } if len(c.AuthToken) > 0 && len(c.AuthTokenFile) > 0 { return errors.New("marathon_sd: at most one of auth_token & auth_token_file must be configured") } if c.HTTPClientConfig.BasicAuth != nil && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) { return errors.New("marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured") } if (len(c.HTTPClientConfig.BearerToken) > 0 || len(c.HTTPClientConfig.BearerTokenFile) > 0) && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) { return errors.New("marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured") } return c.HTTPClientConfig.Validate() } const appListPath string = "/v2/apps/?embed=apps.tasks" // Discovery provides service discovery based on a Marathon instance. type Discovery struct { *refresh.Discovery client *http.Client servers []string lastRefresh map[string]*targetgroup.Group appsClient appListClient } // NewDiscovery returns a new Marathon Discovery. func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd", false) if err != nil { return nil, err } if len(conf.AuthToken) > 0 { rt, err = newAuthTokenRoundTripper(conf.AuthToken, rt) } else if len(conf.AuthTokenFile) > 0 { rt, err = newAuthTokenFileRoundTripper(conf.AuthTokenFile, rt) } if err != nil { return nil, err } d := &Discovery{ client: &http.Client{Transport: rt}, servers: conf.Servers, appsClient: fetchApps, } d.Discovery = refresh.NewDiscovery( logger, "marathon", time.Duration(conf.RefreshInterval), d.refresh, ) return d, nil } type authTokenRoundTripper struct { authToken config_util.Secret rt http.RoundTripper } // newAuthTokenRoundTripper adds the provided auth token to a request. func newAuthTokenRoundTripper(token config_util.Secret, rt http.RoundTripper) (http.RoundTripper, error) { return &authTokenRoundTripper{token, rt}, nil } func (rt *authTokenRoundTripper) RoundTrip(request *http.Request) (*http.Response, error) { // According to https://docs.mesosphere.com/1.11/security/oss/managing-authentication/ // DC/OS wants with "token=" a different Authorization header than implemented in httputil/client.go // so we set this explicitly here. request.Header.Set("Authorization", "token="+string(rt.authToken)) return rt.rt.RoundTrip(request) } type authTokenFileRoundTripper struct { authTokenFile string rt http.RoundTripper } // newAuthTokenFileRoundTripper adds the auth token read from the file to a request. func newAuthTokenFileRoundTripper(tokenFile string, rt http.RoundTripper) (http.RoundTripper, error) { // fail-fast if we can't read the file. _, err := ioutil.ReadFile(tokenFile) if err != nil { return nil, errors.Wrapf(err, "unable to read auth token file %s", tokenFile) } return &authTokenFileRoundTripper{tokenFile, rt}, nil } func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Response, error) { b, err := ioutil.ReadFile(rt.authTokenFile) if err != nil { return nil, errors.Wrapf(err, "unable to read auth token file %s", rt.authTokenFile) } authToken := strings.TrimSpace(string(b)) // According to https://docs.mesosphere.com/1.11/security/oss/managing-authentication/ // DC/OS wants with "token=" a different Authorization header than implemented in httputil/client.go // so we set this explicitly here. request.Header.Set("Authorization", "token="+authToken) return rt.rt.RoundTrip(request) } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { targetMap, err := d.fetchTargetGroups(ctx) if err != nil { return nil, err } all := make([]*targetgroup.Group, 0, len(targetMap)) for _, tg := range targetMap { all = append(all, tg) } select { case <-ctx.Done(): return nil, ctx.Err() default: } // Remove services which did disappear. for source := range d.lastRefresh { _, ok := targetMap[source] if !ok { all = append(all, &targetgroup.Group{Source: source}) } } d.lastRefresh = targetMap return all, nil } func (d *Discovery) fetchTargetGroups(ctx context.Context) (map[string]*targetgroup.Group, error) { url := randomAppsURL(d.servers) apps, err := d.appsClient(ctx, d.client, url) if err != nil { return nil, err } groups := appsToTargetGroups(apps) return groups, nil } // task describes one instance of a service running on Marathon. type task struct { ID string `json:"id"` Host string `json:"host"` Ports []uint32 `json:"ports"` IPAddresses []ipAddress `json:"ipAddresses"` } // ipAddress describes the address and protocol the container's network interface is bound to. type ipAddress struct { Address string `json:"ipAddress"` Proto string `json:"protocol"` } // PortMapping describes in which port the process are binding inside the docker container. type portMapping struct { Labels map[string]string `json:"labels"` ContainerPort uint32 `json:"containerPort"` HostPort uint32 `json:"hostPort"` ServicePort uint32 `json:"servicePort"` } // DockerContainer describes a container which uses the docker runtime. type dockerContainer struct { Image string `json:"image"` PortMappings []portMapping `json:"portMappings"` } // Container describes the runtime an app in running in. type container struct { Docker dockerContainer `json:"docker"` PortMappings []portMapping `json:"portMappings"` } // PortDefinition describes which load balancer port you should access to access the service. type portDefinition struct { Labels map[string]string `json:"labels"` Port uint32 `json:"port"` } // Network describes the name and type of network the container is attached to. type network struct { Name string `json:"name"` Mode string `json:"mode"` } // App describes a service running on Marathon. type app struct { ID string `json:"id"` Tasks []task `json:"tasks"` RunningTasks int `json:"tasksRunning"` Labels map[string]string `json:"labels"` Container container `json:"container"` PortDefinitions []portDefinition `json:"portDefinitions"` Networks []network `json:"networks"` RequirePorts bool `json:"requirePorts"` } // isContainerNet checks if the app's first network is set to mode 'container'. func (app app) isContainerNet() bool { return len(app.Networks) > 0 && app.Networks[0].Mode == "container" } // appList is a list of Marathon apps. type appList struct { Apps []app `json:"apps"` } // appListClient defines a function that can be used to get an application list from marathon. type appListClient func(ctx context.Context, client *http.Client, url string) (*appList, error) // fetchApps requests a list of applications from a marathon server. func fetchApps(ctx context.Context, client *http.Client, url string) (*appList, error) { request, err := http.NewRequest("GET", url, nil) if err != nil { return nil, err } request = request.WithContext(ctx) resp, err := client.Do(request) if err != nil { return nil, err } defer func() { io.Copy(ioutil.Discard, resp.Body) resp.Body.Close() }() if (resp.StatusCode < 200) || (resp.StatusCode >= 300) { return nil, errors.Errorf("non 2xx status '%v' response during marathon service discovery", resp.StatusCode) } var apps appList err = json.NewDecoder(resp.Body).Decode(&apps) if err != nil { return nil, errors.Wrapf(err, "%q", url) } return &apps, nil } // randomAppsURL randomly selects a server from an array and creates // an URL pointing to the app list. func randomAppsURL(servers []string) string { // TODO: If possible update server list from Marathon at some point. server := servers[rand.Intn(len(servers))] return fmt.Sprintf("%s%s", server, appListPath) } // appsToTargetGroups takes an array of Marathon apps and converts them into target groups. func appsToTargetGroups(apps *appList) map[string]*targetgroup.Group { tgroups := map[string]*targetgroup.Group{} for _, a := range apps.Apps { group := createTargetGroup(&a) tgroups[group.Source] = group } return tgroups } func createTargetGroup(app *app) *targetgroup.Group { var ( targets = targetsForApp(app) appName = model.LabelValue(app.ID) image = model.LabelValue(app.Container.Docker.Image) ) tg := &targetgroup.Group{ Targets: targets, Labels: model.LabelSet{ appLabel: appName, imageLabel: image, }, Source: app.ID, } for ln, lv := range app.Labels { ln = appLabelPrefix + strutil.SanitizeLabelName(ln) tg.Labels[model.LabelName(ln)] = model.LabelValue(lv) } return tg } func targetsForApp(app *app) []model.LabelSet { targets := make([]model.LabelSet, 0, len(app.Tasks)) var ports []uint32 var labels []map[string]string var prefix string if len(app.Container.PortMappings) != 0 { // In Marathon 1.5.x the "container.docker.portMappings" object was moved // to "container.portMappings". ports, labels = extractPortMapping(app.Container.PortMappings, app.isContainerNet()) prefix = portMappingLabelPrefix } else if len(app.Container.Docker.PortMappings) != 0 { // Prior to Marathon 1.5 the port mappings could be found at the path // "container.docker.portMappings". ports, labels = extractPortMapping(app.Container.Docker.PortMappings, app.isContainerNet()) prefix = portMappingLabelPrefix } else if len(app.PortDefinitions) != 0 { // PortDefinitions deprecates the "ports" array and can be used to specify // a list of ports with metadata in case a mapping is not required. ports = make([]uint32, len(app.PortDefinitions)) labels = make([]map[string]string, len(app.PortDefinitions)) for i := 0; i < len(app.PortDefinitions); i++ { labels[i] = app.PortDefinitions[i].Labels // When requirePorts is false, this port becomes the 'servicePort', not the listen port. // In this case, the port needs to be taken from the task instead of the app. if app.RequirePorts { ports[i] = app.PortDefinitions[i].Port } } prefix = portDefinitionLabelPrefix } // Gather info about the app's 'tasks'. Each instance (container) is considered a task // and can be reachable at one or more host:port endpoints. for _, t := range app.Tasks { // There are no labels to gather if only Ports is defined. (eg. with host networking) // Ports can only be gathered from the Task (not from the app) and are guaranteed // to be the same across all tasks. If we haven't gathered any ports by now, // use the task's ports as the port list. if len(ports) == 0 && len(t.Ports) != 0 { ports = t.Ports } // Iterate over the ports we gathered using one of the methods above. for i, port := range ports { // A zero port here means that either the portMapping has a zero port defined, // or there is a portDefinition with requirePorts set to false. This means the port // is auto-generated by Mesos and needs to be looked up in the task. if port == 0 && len(t.Ports) == len(ports) { port = t.Ports[i] } // Each port represents a possible Prometheus target. targetAddress := targetEndpoint(&t, port, app.isContainerNet()) target := model.LabelSet{ model.AddressLabel: model.LabelValue(targetAddress), taskLabel: model.LabelValue(t.ID), portIndexLabel: model.LabelValue(strconv.Itoa(i)), } // Gather all port labels and set them on the current target, skip if the port has no Marathon labels. // This will happen in the host networking case with only `ports` defined, where // it is inefficient to allocate a list of possibly hundreds of empty label maps per host port. if len(labels) > 0 { for ln, lv := range labels[i] { ln = prefix + strutil.SanitizeLabelName(ln) target[model.LabelName(ln)] = model.LabelValue(lv) } } targets = append(targets, target) } } return targets } // Generate a target endpoint string in host:port format. func targetEndpoint(task *task, port uint32, containerNet bool) string { var host string // Use the task's ipAddress field when it's in a container network if containerNet && len(task.IPAddresses) > 0 { host = task.IPAddresses[0].Address } else { host = task.Host } return net.JoinHostPort(host, fmt.Sprintf("%d", port)) } // Get a list of ports and a list of labels from a PortMapping. func extractPortMapping(portMappings []portMapping, containerNet bool) ([]uint32, []map[string]string) { ports := make([]uint32, len(portMappings)) labels := make([]map[string]string, len(portMappings)) for i := 0; i < len(portMappings); i++ { labels[i] = portMappings[i].Labels if containerNet { // If the app is in a container network, connect directly to the container port. ports[i] = portMappings[i].ContainerPort } else { // Otherwise, connect to the allocated host port for the container. // Note that this host port is likely set to 0 in the app definition, which means it is // automatically generated and needs to be extracted from the task's 'ports' array at a later stage. ports[i] = portMappings[i].HostPort } } return ports, labels } prometheus-2.15.2+ds/discovery/marathon/marathon_test.go000066400000000000000000000522631360540074000234240ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package marathon import ( "context" "errors" "io" "net/http" "net/http/httptest" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" ) var ( marathonValidLabel = map[string]string{"prometheus": "yes"} testServers = []string{"http://localhost:8080"} conf = SDConfig{Servers: testServers} ) func testUpdateServices(client appListClient) ([]*targetgroup.Group, error) { md, err := NewDiscovery(conf, nil) if err != nil { return nil, err } if client != nil { md.appsClient = client } return md.refresh(context.Background()) } func TestMarathonSDHandleError(t *testing.T) { var ( errTesting = errors.New("testing failure") client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return nil, errTesting } ) tgs, err := testUpdateServices(client) if err != errTesting { t.Fatalf("Expected error: %s", err) } if len(tgs) != 0 { t.Fatalf("Got group: %s", tgs) } } func TestMarathonSDEmptyList(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return &appList{}, nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) > 0 { t.Fatalf("Got group: %v", tgs) } } func marathonTestAppList(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", } docker = dockerContainer{ Image: "repo/image:tag", } portMappings = []portMapping{ {Labels: labels, HostPort: 31000}, } container = container{Docker: docker, PortMappings: portMappings} a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroup(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppList(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 1 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:31000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } } func TestMarathonSDRemoveApp(t *testing.T) { md, err := NewDiscovery(conf, nil) if err != nil { t.Fatalf("%s", err) } md.appsClient = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppList(marathonValidLabel, 1), nil } tgs, err := md.refresh(context.Background()) if err != nil { t.Fatalf("Got error on first update: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 targetgroup, got", len(tgs)) } tg1 := tgs[0] md.appsClient = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppList(marathonValidLabel, 0), nil } tgs, err = md.refresh(context.Background()) if err != nil { t.Fatalf("Got error on second update: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 targetgroup, got", len(tgs)) } tg2 := tgs[0] if tg2.Source != tg1.Source { t.Fatalf("Source is different: %s != %s", tg1.Source, tg2.Source) if len(tg2.Targets) > 0 { t.Fatalf("Got a non-empty target set: %s", tg2.Targets) } } } func marathonTestAppListWithMultiplePorts(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", } docker = dockerContainer{ Image: "repo/image:tag", } portMappings = []portMapping{ {Labels: labels, HostPort: 31000}, {Labels: make(map[string]string), HostPort: 32000}, } container = container{Docker: docker, PortMappings: portMappings} a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithMultiplePort(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithMultiplePorts(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:31000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:32000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestZeroTaskPortAppList(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-2", Host: "mesos-slave-2", Ports: []uint32{}, } docker = dockerContainer{Image: "repo/image:tag"} container = container{Docker: docker} a = app{ ID: "test-service-zero-ports", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonZeroTaskPorts(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestZeroTaskPortAppList(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service-zero-ports" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 0 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } } func Test500ErrorHttpResponseWithValidJSONBody(t *testing.T) { // Simulate 500 error with a valid JSON response. respHandler := func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) w.Header().Set("Content-Type", "application/json") io.WriteString(w, `{}`) } // Create a test server with mock HTTP handler. ts := httptest.NewServer(http.HandlerFunc(respHandler)) defer ts.Close() // Backup conf for future tests. backupConf := conf defer func() { conf = backupConf }() // Setup conf for the test case. conf = SDConfig{Servers: []string{ts.URL}} // Execute test case and validate behavior. _, err := testUpdateServices(nil) if err == nil { t.Fatalf("Expected error for 5xx HTTP response from marathon server, got nil") } } func marathonTestAppListWithPortDefinitions(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", // Auto-generated ports when requirePorts is false Ports: []uint32{1234, 5678}, } docker = dockerContainer{ Image: "repo/image:tag", } container = container{Docker: docker} a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, PortDefinitions: []portDefinition{ {Labels: make(map[string]string), Port: 31000}, {Labels: labels, Port: 32000}, }, RequirePorts: false, // default } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithPortDefinitions(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithPortDefinitions(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:1234" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:5678" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestAppListWithPortDefinitionsRequirePorts(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", Ports: []uint32{31000, 32000}, } docker = dockerContainer{ Image: "repo/image:tag", } container = container{Docker: docker} a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, PortDefinitions: []portDefinition{ {Labels: make(map[string]string), Port: 31000}, {Labels: labels, Port: 32000}, }, RequirePorts: true, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithPortDefinitionsRequirePorts(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithPortDefinitionsRequirePorts(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:31000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:32000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestAppListWithPorts(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", Ports: []uint32{31000, 32000}, } docker = dockerContainer{ Image: "repo/image:tag", } container = container{Docker: docker} a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithPorts(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithPorts(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:31000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:32000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestAppListWithContainerPortMappings(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", Ports: []uint32{ 12345, // 'Automatically-generated' port 32000, }, } docker = dockerContainer{ Image: "repo/image:tag", } container = container{ Docker: docker, PortMappings: []portMapping{ {Labels: labels, HostPort: 0}, {Labels: make(map[string]string), HostPort: 32000}, }, } a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithContainerPortMappings(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:12345" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:32000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestAppListWithDockerContainerPortMappings(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", Ports: []uint32{ 31000, 12345, // 'Automatically-generated' port }, } docker = dockerContainer{ Image: "repo/image:tag", PortMappings: []portMapping{ {Labels: labels, HostPort: 31000}, {Labels: make(map[string]string), HostPort: 0}, }, } container = container{ Docker: docker, } a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithDockerContainerPortMappings(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "mesos-slave1:31000" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "mesos-slave1:12345" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } func marathonTestAppListWithContainerNetworkAndPortMappings(labels map[string]string, runningTasks int) *appList { var ( t = task{ ID: "test-task-1", Host: "mesos-slave1", IPAddresses: []ipAddress{ {Address: "1.2.3.4"}, }, } docker = dockerContainer{ Image: "repo/image:tag", } portMappings = []portMapping{ {Labels: labels, ContainerPort: 8080, HostPort: 31000}, {Labels: make(map[string]string), ContainerPort: 1234, HostPort: 32000}, } container = container{ Docker: docker, PortMappings: portMappings, } networks = []network{ {Mode: "container", Name: "test-network"}, } a = app{ ID: "test-service", Tasks: []task{t}, RunningTasks: runningTasks, Labels: labels, Container: container, Networks: networks, } ) return &appList{ Apps: []app{a}, } } func TestMarathonSDSendGroupWithContainerNetworkAndPortMapping(t *testing.T) { var ( client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return marathonTestAppListWithContainerNetworkAndPortMappings(marathonValidLabel, 1), nil } ) tgs, err := testUpdateServices(client) if err != nil { t.Fatalf("Got error: %s", err) } if len(tgs) != 1 { t.Fatal("Expected 1 target group, got", len(tgs)) } tg := tgs[0] if tg.Source != "test-service" { t.Fatalf("Wrong target group name: %s", tg.Source) } if len(tg.Targets) != 2 { t.Fatalf("Wrong number of targets: %v", tg.Targets) } tgt := tg.Targets[0] if tgt[model.AddressLabel] != "1.2.3.4:8080" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) } tgt = tg.Targets[1] if tgt[model.AddressLabel] != "1.2.3.4:1234" { t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) } if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) } } prometheus-2.15.2+ds/discovery/openstack/000077500000000000000000000000001360540074000203735ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/openstack/hypervisor.go000066400000000000000000000071111360540074000231340ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "context" "fmt" "net" "github.com/go-kit/kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors" "github.com/gophercloud/gophercloud/pagination" "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" ) const ( openstackLabelHypervisorHostIP = openstackLabelPrefix + "hypervisor_host_ip" openstackLabelHypervisorHostName = openstackLabelPrefix + "hypervisor_hostname" openstackLabelHypervisorStatus = openstackLabelPrefix + "hypervisor_status" openstackLabelHypervisorState = openstackLabelPrefix + "hypervisor_state" openstackLabelHypervisorType = openstackLabelPrefix + "hypervisor_type" ) // HypervisorDiscovery discovers OpenStack hypervisors. type HypervisorDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string logger log.Logger port int } // newHypervisorDiscovery returns a new hypervisor discovery. func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, port int, region string, l log.Logger) *HypervisorDiscovery { return &HypervisorDiscovery{provider: provider, authOpts: opts, region: region, port: port, logger: l} } func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { h.provider.Context = ctx err := openstack.Authenticate(h.provider, *h.authOpts) if err != nil { return nil, errors.Wrap(err, "could not authenticate to OpenStack") } client, err := openstack.NewComputeV2(h.provider, gophercloud.EndpointOpts{ Region: h.region, }) if err != nil { return nil, errors.Wrap(err, "could not create OpenStack compute session") } tg := &targetgroup.Group{ Source: fmt.Sprintf("OS_" + h.region), } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-hypervisors-details pagerHypervisors := hypervisors.List(client) err = pagerHypervisors.EachPage(func(page pagination.Page) (bool, error) { hypervisorList, err := hypervisors.ExtractHypervisors(page) if err != nil { return false, errors.Wrap(err, "could not extract hypervisors") } for _, hypervisor := range hypervisorList { labels := model.LabelSet{} addr := net.JoinHostPort(hypervisor.HostIP, fmt.Sprintf("%d", h.port)) labels[model.AddressLabel] = model.LabelValue(addr) labels[openstackLabelHypervisorHostName] = model.LabelValue(hypervisor.HypervisorHostname) labels[openstackLabelHypervisorHostIP] = model.LabelValue(hypervisor.HostIP) labels[openstackLabelHypervisorStatus] = model.LabelValue(hypervisor.Status) labels[openstackLabelHypervisorState] = model.LabelValue(hypervisor.State) labels[openstackLabelHypervisorType] = model.LabelValue(hypervisor.HypervisorType) tg.Targets = append(tg.Targets, labels) } return true, nil }) if err != nil { return nil, err } return []*targetgroup.Group{tg}, nil } prometheus-2.15.2+ds/discovery/openstack/hypervisor_test.go000066400000000000000000000067451360540074000242070ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "context" "strings" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/util/testutil" ) type OpenstackSDHypervisorTestSuite struct { Mock *SDMock } func (s *OpenstackSDHypervisorTestSuite) TearDownSuite() { s.Mock.ShutdownServer() } func (s *OpenstackSDHypervisorTestSuite) SetupTest(t *testing.T) { s.Mock = NewSDMock(t) s.Mock.Setup() s.Mock.HandleHypervisorListSuccessfully() s.Mock.HandleVersionsSuccessfully() s.Mock.HandleAuthSuccessfully() } func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (refresher, error) { conf := SDConfig{ IdentityEndpoint: s.Mock.Endpoint(), Password: "test", Username: "test", DomainName: "12345", Region: "RegionOne", Role: "hypervisor", } return newRefresher(&conf, nil) } func TestOpenstackSDHypervisorRefresh(t *testing.T) { mock := &OpenstackSDHypervisorTestSuite{} mock.SetupTest(t) hypervisor, _ := mock.openstackAuthSuccess() ctx := context.Background() tgs, err := hypervisor.refresh(ctx) testutil.Equals(t, 1, len(tgs)) tg := tgs[0] testutil.Ok(t, err) testutil.Assert(t, tg != nil, "") testutil.Assert(t, tg.Targets != nil, "") testutil.Assert(t, len(tg.Targets) == 2, "") testutil.Equals(t, tg.Targets[0]["__address__"], model.LabelValue("172.16.70.14:0")) testutil.Equals(t, tg.Targets[0]["__meta_openstack_hypervisor_hostname"], model.LabelValue("nc14.cloud.com")) testutil.Equals(t, tg.Targets[0]["__meta_openstack_hypervisor_type"], model.LabelValue("QEMU")) testutil.Equals(t, tg.Targets[0]["__meta_openstack_hypervisor_host_ip"], model.LabelValue("172.16.70.14")) testutil.Equals(t, tg.Targets[0]["__meta_openstack_hypervisor_state"], model.LabelValue("up")) testutil.Equals(t, tg.Targets[0]["__meta_openstack_hypervisor_status"], model.LabelValue("enabled")) testutil.Equals(t, tg.Targets[1]["__address__"], model.LabelValue("172.16.70.13:0")) testutil.Equals(t, tg.Targets[1]["__meta_openstack_hypervisor_hostname"], model.LabelValue("cc13.cloud.com")) testutil.Equals(t, tg.Targets[1]["__meta_openstack_hypervisor_type"], model.LabelValue("QEMU")) testutil.Equals(t, tg.Targets[1]["__meta_openstack_hypervisor_host_ip"], model.LabelValue("172.16.70.13")) testutil.Equals(t, tg.Targets[1]["__meta_openstack_hypervisor_state"], model.LabelValue("up")) testutil.Equals(t, tg.Targets[1]["__meta_openstack_hypervisor_status"], model.LabelValue("enabled")) mock.TearDownSuite() } func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) { mock := &OpenstackSDHypervisorTestSuite{} mock.SetupTest(t) hypervisor, _ := mock.openstackAuthSuccess() ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) testutil.NotOk(t, err) testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled) mock.TearDownSuite() } prometheus-2.15.2+ds/discovery/openstack/instance.go000066400000000000000000000150301360540074000225250ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "context" "fmt" "net" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/pagination" "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" ) const ( openstackLabelPrefix = model.MetaLabelPrefix + "openstack_" openstackLabelAddressPool = openstackLabelPrefix + "address_pool" openstackLabelInstanceFlavor = openstackLabelPrefix + "instance_flavor" openstackLabelInstanceID = openstackLabelPrefix + "instance_id" openstackLabelInstanceName = openstackLabelPrefix + "instance_name" openstackLabelInstanceStatus = openstackLabelPrefix + "instance_status" openstackLabelPrivateIP = openstackLabelPrefix + "private_ip" openstackLabelProjectID = openstackLabelPrefix + "project_id" openstackLabelPublicIP = openstackLabelPrefix + "public_ip" openstackLabelTagPrefix = openstackLabelPrefix + "tag_" openstackLabelUserID = openstackLabelPrefix + "user_id" ) // InstanceDiscovery discovers OpenStack instances. type InstanceDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string logger log.Logger port int allTenants bool } // NewInstanceDiscovery returns a new instance discovery. func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, port int, region string, allTenants bool, l log.Logger) *InstanceDiscovery { if l == nil { l = log.NewNopLogger() } return &InstanceDiscovery{provider: provider, authOpts: opts, region: region, port: port, allTenants: allTenants, logger: l} } type floatingIPKey struct { id string fixed string } func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { i.provider.Context = ctx err := openstack.Authenticate(i.provider, *i.authOpts) if err != nil { return nil, errors.Wrap(err, "could not authenticate to OpenStack") } client, err := openstack.NewComputeV2(i.provider, gophercloud.EndpointOpts{ Region: i.region, }) if err != nil { return nil, errors.Wrap(err, "could not create OpenStack compute session") } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-floating-ips pagerFIP := floatingips.List(client) floatingIPList := make(map[floatingIPKey]string) floatingIPPresent := make(map[string]struct{}) err = pagerFIP.EachPage(func(page pagination.Page) (bool, error) { result, err := floatingips.ExtractFloatingIPs(page) if err != nil { return false, errors.Wrap(err, "could not extract floatingips") } for _, ip := range result { // Skip not associated ips if ip.InstanceID == "" || ip.FixedIP == "" { continue } floatingIPList[floatingIPKey{id: ip.InstanceID, fixed: ip.FixedIP}] = ip.IP floatingIPPresent[ip.IP] = struct{}{} } return true, nil }) if err != nil { return nil, err } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-servers opts := servers.ListOpts{ AllTenants: i.allTenants, } pager := servers.List(client, opts) tg := &targetgroup.Group{ Source: fmt.Sprintf("OS_" + i.region), } err = pager.EachPage(func(page pagination.Page) (bool, error) { if ctx.Err() != nil { return false, errors.Wrap(ctx.Err(), "could not extract instances") } instanceList, err := servers.ExtractServers(page) if err != nil { return false, errors.Wrap(err, "could not extract instances") } for _, s := range instanceList { if len(s.Addresses) == 0 { level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID) continue } labels := model.LabelSet{ openstackLabelInstanceID: model.LabelValue(s.ID), openstackLabelInstanceStatus: model.LabelValue(s.Status), openstackLabelInstanceName: model.LabelValue(s.Name), openstackLabelProjectID: model.LabelValue(s.TenantID), openstackLabelUserID: model.LabelValue(s.UserID), } id, ok := s.Flavor["id"].(string) if !ok { level.Warn(i.logger).Log("msg", "Invalid type for flavor id, expected string") continue } labels[openstackLabelInstanceFlavor] = model.LabelValue(id) for k, v := range s.Metadata { name := strutil.SanitizeLabelName(k) labels[openstackLabelTagPrefix+model.LabelName(name)] = model.LabelValue(v) } for pool, address := range s.Addresses { md, ok := address.([]interface{}) if !ok { level.Warn(i.logger).Log("msg", "Invalid type for address, expected array") continue } if len(md) == 0 { level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID) continue } for _, address := range md { md1, ok := address.(map[string]interface{}) if !ok { level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict") continue } addr, ok := md1["addr"].(string) if !ok { level.Warn(i.logger).Log("msg", "Invalid type for address, expected string") continue } if _, ok := floatingIPPresent[addr]; ok { continue } lbls := make(model.LabelSet, len(labels)) for k, v := range labels { lbls[k] = v } lbls[openstackLabelAddressPool] = model.LabelValue(pool) lbls[openstackLabelPrivateIP] = model.LabelValue(addr) if val, ok := floatingIPList[floatingIPKey{id: s.ID, fixed: addr}]; ok { lbls[openstackLabelPublicIP] = model.LabelValue(val) } addr = net.JoinHostPort(addr, fmt.Sprintf("%d", i.port)) lbls[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, lbls) } } } return true, nil }) if err != nil { return nil, err } return []*targetgroup.Group{tg}, nil } prometheus-2.15.2+ds/discovery/openstack/instance_test.go000066400000000000000000000130041360540074000235630ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "context" "fmt" "strings" "testing" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/util/testutil" ) type OpenstackSDInstanceTestSuite struct { Mock *SDMock } func (s *OpenstackSDInstanceTestSuite) TearDownSuite() { s.Mock.ShutdownServer() } func (s *OpenstackSDInstanceTestSuite) SetupTest(t *testing.T) { s.Mock = NewSDMock(t) s.Mock.Setup() s.Mock.HandleServerListSuccessfully() s.Mock.HandleFloatingIPListSuccessfully() s.Mock.HandleVersionsSuccessfully() s.Mock.HandleAuthSuccessfully() } func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (refresher, error) { conf := SDConfig{ IdentityEndpoint: s.Mock.Endpoint(), Password: "test", Username: "test", DomainName: "12345", Region: "RegionOne", Role: "instance", AllTenants: true, } return newRefresher(&conf, nil) } func TestOpenstackSDInstanceRefresh(t *testing.T) { mock := &OpenstackSDInstanceTestSuite{} mock.SetupTest(t) instance, err := mock.openstackAuthSuccess() testutil.Ok(t, err) ctx := context.Background() tgs, err := instance.refresh(ctx) testutil.Ok(t, err) testutil.Equals(t, 1, len(tgs)) tg := tgs[0] testutil.Assert(t, tg != nil, "") testutil.Assert(t, tg.Targets != nil, "") testutil.Equals(t, 4, len(tg.Targets)) for i, lbls := range []model.LabelSet{ { "__address__": model.LabelValue("10.0.0.32:0"), "__meta_openstack_instance_flavor": model.LabelValue("1"), "__meta_openstack_instance_id": model.LabelValue("ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"), "__meta_openstack_instance_status": model.LabelValue("ACTIVE"), "__meta_openstack_instance_name": model.LabelValue("herp"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.32"), "__meta_openstack_public_ip": model.LabelValue("10.10.10.2"), "__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"), "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"), }, { "__address__": model.LabelValue("10.0.0.31:0"), "__meta_openstack_instance_flavor": model.LabelValue("1"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682ba"), "__meta_openstack_instance_status": model.LabelValue("ACTIVE"), "__meta_openstack_instance_name": model.LabelValue("derp"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.31"), "__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"), "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"), }, { "__address__": model.LabelValue("10.0.0.33:0"), "__meta_openstack_instance_flavor": model.LabelValue("4"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"), "__meta_openstack_instance_status": model.LabelValue("ACTIVE"), "__meta_openstack_instance_name": model.LabelValue("merp"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.33"), "__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_tag_env": model.LabelValue("prod"), "__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"), "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"), }, { "__address__": model.LabelValue("10.0.0.34:0"), "__meta_openstack_instance_flavor": model.LabelValue("4"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"), "__meta_openstack_instance_status": model.LabelValue("ACTIVE"), "__meta_openstack_instance_name": model.LabelValue("merp"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.34"), "__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_tag_env": model.LabelValue("prod"), "__meta_openstack_public_ip": model.LabelValue("10.10.10.4"), "__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"), "__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"), }, } { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { testutil.Equals(t, lbls, tg.Targets[i]) }) } mock.TearDownSuite() } func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) { mock := &OpenstackSDHypervisorTestSuite{} mock.SetupTest(t) hypervisor, _ := mock.openstackAuthSuccess() ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) testutil.NotOk(t, err) testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled) mock.TearDownSuite() } prometheus-2.15.2+ds/discovery/openstack/mock_test.go000066400000000000000000000442721360540074000227230ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "fmt" "net/http" "net/http/httptest" "testing" ) // SDMock is the interface for the OpenStack mock type SDMock struct { t *testing.T Server *httptest.Server Mux *http.ServeMux } // NewSDMock returns a new SDMock. func NewSDMock(t *testing.T) *SDMock { return &SDMock{ t: t, } } // Endpoint returns the URI to the mock server func (m *SDMock) Endpoint() string { return m.Server.URL + "/" } // Setup creates the mock server func (m *SDMock) Setup() { m.Mux = http.NewServeMux() m.Server = httptest.NewServer(m.Mux) } // ShutdownServer creates the mock server func (m *SDMock) ShutdownServer() { m.Server.Close() } const tokenID = "cbc36478b0bd8e67e89469c7749d4127" func testMethod(t *testing.T, r *http.Request, expected string) { if expected != r.Method { t.Errorf("Request method = %v, expected %v", r.Method, expected) } } func testHeader(t *testing.T, r *http.Request, header string, expected string) { if actual := r.Header.Get(header); expected != actual { t.Errorf("Header %s = %s, expected %s", header, actual, expected) } } // HandleVersionsSuccessfully mocks version call func (m *SDMock) HandleVersionsSuccessfully() { m.Mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, ` { "versions": { "values": [ { "status": "stable", "id": "v3.0", "links": [ { "href": "%s", "rel": "self" } ] }, { "status": "stable", "id": "v2.0", "links": [ { "href": "%s", "rel": "self" } ] } ] } } `, m.Endpoint()+"v3/", m.Endpoint()+"v2.0/") }) } // HandleAuthSuccessfully mocks auth call func (m *SDMock) HandleAuthSuccessfully() { m.Mux.HandleFunc("/v3/auth/tokens", func(w http.ResponseWriter, r *http.Request) { w.Header().Add("X-Subject-Token", tokenID) w.WriteHeader(http.StatusCreated) fmt.Fprintf(w, ` { "token": { "audit_ids": ["VcxU2JYqT8OzfUVvrjEITQ", "qNUTIJntTzO1-XUk5STybw"], "catalog": [ { "endpoints": [ { "id": "39dc322ce86c4111b4f06c2eeae0841b", "interface": "public", "region": "RegionOne", "url": "http://localhost:5000" }, { "id": "ec642f27474842e78bf059f6c48f4e99", "interface": "internal", "region": "RegionOne", "url": "http://localhost:5000" }, { "id": "c609fc430175452290b62a4242e8a7e8", "interface": "admin", "region": "RegionOne", "url": "http://localhost:35357" } ], "id": "4363ae44bdf34a3981fde3b823cb9aa2", "type": "identity", "name": "keystone" }, { "endpoints": [ { "id": "e2ffee808abc4a60916715b1d4b489dd", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "%s" } ], "id": "b7f2a5b1a019459cb956e43a8cb41e31", "type": "compute" } ], "expires_at": "2013-02-27T18:30:59.999999Z", "is_domain": false, "issued_at": "2013-02-27T16:30:59.999999Z", "methods": [ "password" ], "project": { "domain": { "id": "1789d1", "name": "example.com" }, "id": "263fd9", "name": "project-x" }, "roles": [ { "id": "76e72a", "name": "admin" }, { "id": "f4f392", "name": "member" } ], "user": { "domain": { "id": "1789d1", "name": "example.com" }, "id": "0ca8f6", "name": "Joe", "password_expires_at": "2016-11-06T15:32:17.000000" } } } `, m.Endpoint()) }) } const hypervisorListBody = ` { "hypervisors": [ { "status": "enabled", "service": { "host": "nc14.cloud.com", "disabled_reason": null, "id": 16 }, "vcpus_used": 18, "hypervisor_type": "QEMU", "local_gb_used": 84, "vcpus": 24, "hypervisor_hostname": "nc14.cloud.com", "memory_mb_used": 24064, "memory_mb": 96484, "current_workload": 1, "state": "up", "host_ip": "172.16.70.14", "cpu_info": "{\"vendor\": \"Intel\", \"model\": \"IvyBridge\", \"arch\": \"x86_64\", \"features\": [\"pge\", \"avx\", \"clflush\", \"sep\", \"syscall\", \"vme\", \"dtes64\", \"msr\", \"fsgsbase\", \"xsave\", \"vmx\", \"erms\", \"xtpr\", \"cmov\", \"smep\", \"ssse3\", \"est\", \"pat\", \"monitor\", \"smx\", \"pbe\", \"lm\", \"tsc\", \"nx\", \"fxsr\", \"tm\", \"sse4.1\", \"pae\", \"sse4.2\", \"pclmuldq\", \"acpi\", \"tsc-deadline\", \"mmx\", \"osxsave\", \"cx8\", \"mce\", \"de\", \"tm2\", \"ht\", \"dca\", \"lahf_lm\", \"popcnt\", \"mca\", \"pdpe1gb\", \"apic\", \"sse\", \"f16c\", \"pse\", \"ds\", \"invtsc\", \"pni\", \"rdtscp\", \"aes\", \"sse2\", \"ss\", \"ds_cpl\", \"pcid\", \"fpu\", \"cx16\", \"pse36\", \"mtrr\", \"pdcm\", \"rdrand\", \"x2apic\"], \"topology\": {\"cores\": 6, \"cells\": 2, \"threads\": 2, \"sockets\": 1}}", "running_vms": 10, "free_disk_gb": 315, "hypervisor_version": 2003000, "disk_available_least": 304, "local_gb": 399, "free_ram_mb": 72420, "id": 1 }, { "status": "enabled", "service": { "host": "cc13.cloud.com", "disabled_reason": null, "id": 17 }, "vcpus_used": 1, "hypervisor_type": "QEMU", "local_gb_used": 20, "vcpus": 24, "hypervisor_hostname": "cc13.cloud.com", "memory_mb_used": 2560, "memory_mb": 96484, "current_workload": 0, "state": "up", "host_ip": "172.16.70.13", "cpu_info": "{\"vendor\": \"Intel\", \"model\": \"IvyBridge\", \"arch\": \"x86_64\", \"features\": [\"pge\", \"avx\", \"clflush\", \"sep\", \"syscall\", \"vme\", \"dtes64\", \"msr\", \"fsgsbase\", \"xsave\", \"vmx\", \"erms\", \"xtpr\", \"cmov\", \"smep\", \"ssse3\", \"est\", \"pat\", \"monitor\", \"smx\", \"pbe\", \"lm\", \"tsc\", \"nx\", \"fxsr\", \"tm\", \"sse4.1\", \"pae\", \"sse4.2\", \"pclmuldq\", \"acpi\", \"tsc-deadline\", \"mmx\", \"osxsave\", \"cx8\", \"mce\", \"de\", \"tm2\", \"ht\", \"dca\", \"lahf_lm\", \"popcnt\", \"mca\", \"pdpe1gb\", \"apic\", \"sse\", \"f16c\", \"pse\", \"ds\", \"invtsc\", \"pni\", \"rdtscp\", \"aes\", \"sse2\", \"ss\", \"ds_cpl\", \"pcid\", \"fpu\", \"cx16\", \"pse36\", \"mtrr\", \"pdcm\", \"rdrand\", \"x2apic\"], \"topology\": {\"cores\": 6, \"cells\": 2, \"threads\": 2, \"sockets\": 1}}", "running_vms": 0, "free_disk_gb": 379, "hypervisor_version": 2003000, "disk_available_least": 384, "local_gb": 399, "free_ram_mb": 93924, "id": 721 } ] }` // HandleHypervisorListSuccessfully mocks os-hypervisors detail call func (m *SDMock) HandleHypervisorListSuccessfully() { m.Mux.HandleFunc("/os-hypervisors/detail", func(w http.ResponseWriter, r *http.Request) { testMethod(m.t, r, "GET") testHeader(m.t, r, "X-Auth-Token", tokenID) w.Header().Add("Content-Type", "application/json") fmt.Fprint(w, hypervisorListBody) }) } const serverListBody = ` { "servers": [ { "status": "ERROR", "updated": "2014-09-25T13:10:10Z", "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362", "OS-EXT-SRV-ATTR:host": "devstack", "addresses": {}, "links": [ { "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/af9bcad9-3c87-477d-9347-b291eabf480e", "rel": "self" }, { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/af9bcad9-3c87-477d-9347-b291eabf480e", "rel": "bookmark" } ], "key_name": null, "image": { "id": "f90f6034-2570-4974-8351-6b49732ef2eb", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/images/f90f6034-2570-4974-8351-6b49732ef2eb", "rel": "bookmark" } ] }, "OS-EXT-STS:task_state": null, "OS-EXT-STS:vm_state": "error", "OS-EXT-SRV-ATTR:instance_name": "instance-00000010", "OS-SRV-USG:launched_at": "2014-09-25T13:10:10.000000", "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack", "flavor": { "id": "1", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/flavors/1", "rel": "bookmark" } ] }, "id": "af9bcad9-3c87-477d-9347-b291eabf480e", "security_groups": [ { "name": "default" } ], "OS-SRV-USG:terminated_at": null, "OS-EXT-AZ:availability_zone": "nova", "user_id": "9349aff8be7545ac9d2f1d00999a23cd", "name": "herp2", "created": "2014-09-25T13:10:02Z", "tenant_id": "fcad67a6189847c4aecfa3c81a05783b", "OS-DCF:diskConfig": "MANUAL", "os-extended-volumes:volumes_attached": [], "accessIPv4": "", "accessIPv6": "", "progress": 0, "OS-EXT-STS:power_state": 1, "config_drive": "", "metadata": {} }, { "status": "ACTIVE", "updated": "2014-09-25T13:10:10Z", "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362", "OS-EXT-SRV-ATTR:host": "devstack", "addresses": { "private": [ { "OS-EXT-IPS-MAC:mac_addr": "fa:16:3e:7c:1b:2b", "version": 4, "addr": "10.0.0.32", "OS-EXT-IPS:type": "fixed" }, { "version": 4, "addr": "10.10.10.2", "OS-EXT-IPS:type": "floating" } ] }, "links": [ { "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/ef079b0c-e610-4dfb-b1aa-b49f07ac48e5", "rel": "self" }, { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/ef079b0c-e610-4dfb-b1aa-b49f07ac48e5", "rel": "bookmark" } ], "key_name": null, "image": { "id": "f90f6034-2570-4974-8351-6b49732ef2eb", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/images/f90f6034-2570-4974-8351-6b49732ef2eb", "rel": "bookmark" } ] }, "OS-EXT-STS:task_state": null, "OS-EXT-STS:vm_state": "active", "OS-EXT-SRV-ATTR:instance_name": "instance-0000001e", "OS-SRV-USG:launched_at": "2014-09-25T13:10:10.000000", "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack", "flavor": { "id": "1", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/flavors/1", "rel": "bookmark" } ] }, "id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5", "security_groups": [ { "name": "default" } ], "OS-SRV-USG:terminated_at": null, "OS-EXT-AZ:availability_zone": "nova", "user_id": "9349aff8be7545ac9d2f1d00999a23cd", "name": "herp", "created": "2014-09-25T13:10:02Z", "tenant_id": "fcad67a6189847c4aecfa3c81a05783b", "OS-DCF:diskConfig": "MANUAL", "os-extended-volumes:volumes_attached": [], "accessIPv4": "", "accessIPv6": "", "progress": 0, "OS-EXT-STS:power_state": 1, "config_drive": "", "metadata": {} }, { "status": "ACTIVE", "updated": "2014-09-25T13:04:49Z", "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362", "OS-EXT-SRV-ATTR:host": "devstack", "addresses": { "private": [ { "OS-EXT-IPS-MAC:mac_addr": "fa:16:3e:9e:89:be", "version": 4, "addr": "10.0.0.31", "OS-EXT-IPS:type": "fixed" } ] }, "links": [ { "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba", "rel": "self" }, { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba", "rel": "bookmark" } ], "key_name": null, "image": { "id": "f90f6034-2570-4974-8351-6b49732ef2eb", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/images/f90f6034-2570-4974-8351-6b49732ef2eb", "rel": "bookmark" } ] }, "OS-EXT-STS:task_state": null, "OS-EXT-STS:vm_state": "active", "OS-EXT-SRV-ATTR:instance_name": "instance-0000001d", "OS-SRV-USG:launched_at": "2014-09-25T13:04:49.000000", "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack", "flavor": { "id": "1", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/flavors/1", "rel": "bookmark" } ] }, "id": "9e5476bd-a4ec-4653-93d6-72c93aa682ba", "security_groups": [ { "name": "default" } ], "OS-SRV-USG:terminated_at": null, "OS-EXT-AZ:availability_zone": "nova", "user_id": "9349aff8be7545ac9d2f1d00999a23cd", "name": "derp", "created": "2014-09-25T13:04:41Z", "tenant_id": "fcad67a6189847c4aecfa3c81a05783b", "OS-DCF:diskConfig": "MANUAL", "os-extended-volumes:volumes_attached": [], "accessIPv4": "", "accessIPv6": "", "progress": 0, "OS-EXT-STS:power_state": 1, "config_drive": "", "metadata": {} }, { "status": "ACTIVE", "updated": "2014-09-25T13:04:49Z", "hostId": "29d3c8c896a45aa4c34e52247875d7fefc3d94bbcc9f622b5d204362", "OS-EXT-SRV-ATTR:host": "devstack", "addresses": { "private": [ { "version": 4, "addr": "10.0.0.33", "OS-EXT-IPS:type": "fixed" }, { "version": 4, "addr": "10.0.0.34", "OS-EXT-IPS:type": "fixed" }, { "version": 4, "addr": "10.10.10.4", "OS-EXT-IPS:type": "floating" } ] }, "links": [ { "href": "http://104.130.131.164:8774/v2/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba", "rel": "self" }, { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/servers/9e5476bd-a4ec-4653-93d6-72c93aa682ba", "rel": "bookmark" } ], "key_name": null, "image": "", "OS-EXT-STS:task_state": null, "OS-EXT-STS:vm_state": "active", "OS-EXT-SRV-ATTR:instance_name": "instance-0000001d", "OS-SRV-USG:launched_at": "2014-09-25T13:04:49.000000", "OS-EXT-SRV-ATTR:hypervisor_hostname": "devstack", "flavor": { "id": "4", "links": [ { "href": "http://104.130.131.164:8774/fcad67a6189847c4aecfa3c81a05783b/flavors/1", "rel": "bookmark" } ] }, "id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb", "security_groups": [ { "name": "default" } ], "OS-SRV-USG:terminated_at": null, "OS-EXT-AZ:availability_zone": "nova", "user_id": "9349aff8be7545ac9d2f1d00999a23cd", "name": "merp", "created": "2014-09-25T13:04:41Z", "tenant_id": "fcad67a6189847c4aecfa3c81a05783b", "OS-DCF:diskConfig": "MANUAL", "os-extended-volumes:volumes_attached": [], "accessIPv4": "", "accessIPv6": "", "progress": 0, "OS-EXT-STS:power_state": 1, "config_drive": "", "metadata": { "env": "prod" } } ] } ` // HandleServerListSuccessfully mocks server detail call func (m *SDMock) HandleServerListSuccessfully() { m.Mux.HandleFunc("/servers/detail", func(w http.ResponseWriter, r *http.Request) { testMethod(m.t, r, "GET") testHeader(m.t, r, "X-Auth-Token", tokenID) w.Header().Add("Content-Type", "application/json") fmt.Fprint(w, serverListBody) }) } const listOutput = ` { "floating_ips": [ { "fixed_ip": null, "id": "1", "instance_id": null, "ip": "10.10.10.1", "pool": "nova" }, { "fixed_ip": "10.0.0.32", "id": "2", "instance_id": "ef079b0c-e610-4dfb-b1aa-b49f07ac48e5", "ip": "10.10.10.2", "pool": "nova" }, { "fixed_ip": "10.0.0.34", "id": "3", "instance_id": "9e5476bd-a4ec-4653-93d6-72c93aa682bb", "ip": "10.10.10.4", "pool": "nova" } ] } ` // HandleFloatingIPListSuccessfully mocks floating ips call func (m *SDMock) HandleFloatingIPListSuccessfully() { m.Mux.HandleFunc("/os-floating-ips", func(w http.ResponseWriter, r *http.Request) { testMethod(m.t, r, "GET") testHeader(m.t, r, "X-Auth-Token", tokenID) w.Header().Add("Content-Type", "application/json") fmt.Fprint(w, listOutput) }) } prometheus-2.15.2+ds/discovery/openstack/openstack.go000066400000000000000000000135171360540074000227200ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package openstack import ( "context" "net/http" "time" "github.com/go-kit/kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" conntrack "github.com/mwitkow/go-conntrack" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" ) // DefaultSDConfig is the default OpenStack SD configuration. var DefaultSDConfig = SDConfig{ Port: 80, RefreshInterval: model.Duration(60 * time.Second), } // SDConfig is the configuration for OpenStack based service discovery. type SDConfig struct { IdentityEndpoint string `yaml:"identity_endpoint"` Username string `yaml:"username"` UserID string `yaml:"userid"` Password config_util.Secret `yaml:"password"` ProjectName string `yaml:"project_name"` ProjectID string `yaml:"project_id"` DomainName string `yaml:"domain_name"` DomainID string `yaml:"domain_id"` ApplicationCredentialName string `yaml:"application_credential_name"` ApplicationCredentialID string `yaml:"application_credential_id"` ApplicationCredentialSecret config_util.Secret `yaml:"application_credential_secret"` Role Role `yaml:"role"` Region string `yaml:"region"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` Port int `yaml:"port"` AllTenants bool `yaml:"all_tenants,omitempty"` TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"` } // Role is the role of the target in OpenStack. type Role string // The valid options for OpenStackRole. const ( // OpenStack document reference // https://docs.openstack.org/nova/pike/admin/arch.html#hypervisors OpenStackRoleHypervisor Role = "hypervisor" // OpenStack document reference // https://docs.openstack.org/horizon/pike/user/launch-instances.html OpenStackRoleInstance Role = "instance" ) // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error { if err := unmarshal((*string)(c)); err != nil { return err } switch *c { case OpenStackRoleHypervisor, OpenStackRoleInstance: return nil default: return errors.Errorf("unknown OpenStack SD role %q", *c) } } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if c.Role == "" { return errors.New("role missing (one of: instance, hypervisor)") } if c.Region == "" { return errors.New("openstack SD configuration requires a region") } return nil } type refresher interface { refresh(context.Context) ([]*targetgroup.Group, error) } // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets. func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) { r, err := newRefresher(conf, l) if err != nil { return nil, err } return refresh.NewDiscovery( l, "openstack", time.Duration(conf.RefreshInterval), r.refresh, ), nil } func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { var opts gophercloud.AuthOptions if conf.IdentityEndpoint == "" { var err error opts, err = openstack.AuthOptionsFromEnv() if err != nil { return nil, err } } else { opts = gophercloud.AuthOptions{ IdentityEndpoint: conf.IdentityEndpoint, Username: conf.Username, UserID: conf.UserID, Password: string(conf.Password), TenantName: conf.ProjectName, TenantID: conf.ProjectID, DomainName: conf.DomainName, DomainID: conf.DomainID, ApplicationCredentialID: conf.ApplicationCredentialID, ApplicationCredentialName: conf.ApplicationCredentialName, ApplicationCredentialSecret: string(conf.ApplicationCredentialSecret), } } client, err := openstack.NewClient(opts.IdentityEndpoint) if err != nil { return nil, err } tls, err := config_util.NewTLSConfig(&conf.TLSConfig) if err != nil { return nil, err } client.HTTPClient = http.Client{ Transport: &http.Transport{ IdleConnTimeout: 5 * time.Duration(conf.RefreshInterval), TLSClientConfig: tls, DialContext: conntrack.NewDialContextFunc( conntrack.DialWithTracing(), conntrack.DialWithName("openstack_sd"), ), }, Timeout: 5 * time.Duration(conf.RefreshInterval), } switch conf.Role { case OpenStackRoleHypervisor: return newHypervisorDiscovery(client, &opts, conf.Port, conf.Region, l), nil case OpenStackRoleInstance: return newInstanceDiscovery(client, &opts, conf.Port, conf.Region, conf.AllTenants, l), nil } return nil, errors.New("unknown OpenStack discovery role") } prometheus-2.15.2+ds/discovery/refresh/000077500000000000000000000000001360540074000200425ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/refresh/refresh.go000066400000000000000000000061751360540074000220400ustar00rootroot00000000000000// Copyright 2019 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package refresh import ( "context" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/discovery/targetgroup" ) var ( failuresCount = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "prometheus_sd_refresh_failures_total", Help: "Number of refresh failures for the given SD mechanism.", }, []string{"mechanism"}, ) duration = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "prometheus_sd_refresh_duration_seconds", Help: "The duration of a refresh in seconds for the given SD mechanism.", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }, []string{"mechanism"}, ) ) func init() { prometheus.MustRegister(duration, failuresCount) } // Discovery implements the Discoverer interface. type Discovery struct { logger log.Logger interval time.Duration refreshf func(ctx context.Context) ([]*targetgroup.Group, error) failures prometheus.Counter duration prometheus.Observer } // NewDiscovery returns a Discoverer function that calls a refresh() function at every interval. func NewDiscovery(l log.Logger, mech string, interval time.Duration, refreshf func(ctx context.Context) ([]*targetgroup.Group, error)) *Discovery { if l == nil { l = log.NewNopLogger() } return &Discovery{ logger: l, interval: interval, refreshf: refreshf, failures: failuresCount.WithLabelValues(mech), duration: duration.WithLabelValues(mech), } } // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // Get an initial set right away. tgs, err := d.refresh(ctx) if err != nil { if ctx.Err() != context.Canceled { level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) } } else { select { case ch <- tgs: case <-ctx.Done(): return } } ticker := time.NewTicker(d.interval) defer ticker.Stop() for { select { case <-ticker.C: tgs, err := d.refresh(ctx) if err != nil { if ctx.Err() != context.Canceled { level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) } continue } select { case ch <- tgs: case <-ctx.Done(): return } case <-ctx.Done(): return } } } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { now := time.Now() defer d.duration.Observe(time.Since(now).Seconds()) tgs, err := d.refreshf(ctx) if err != nil { d.failures.Inc() } return tgs, err } prometheus-2.15.2+ds/discovery/refresh/refresh_test.go000066400000000000000000000034701360540074000230720ustar00rootroot00000000000000// Copyright 2019 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package refresh import ( "context" "fmt" "testing" "time" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" ) func TestRefresh(t *testing.T) { tg1 := []*targetgroup.Group{ { Source: "tg", Targets: []model.LabelSet{ { model.LabelName("t1"): model.LabelValue("v1"), }, { model.LabelName("t2"): model.LabelValue("v2"), }, }, Labels: model.LabelSet{ model.LabelName("l1"): model.LabelValue("lv1"), }, }, } tg2 := []*targetgroup.Group{ { Source: "tg", }, } var i int refresh := func(ctx context.Context) ([]*targetgroup.Group, error) { i++ switch i { case 1: return tg1, nil case 2: return tg2, nil } return nil, fmt.Errorf("some error") } interval := time.Millisecond d := NewDiscovery(nil, "test", interval, refresh) ch := make(chan []*targetgroup.Group) ctx, cancel := context.WithCancel(context.Background()) defer cancel() go d.Run(ctx, ch) tg := <-ch testutil.Equals(t, tg1, tg) tg = <-ch testutil.Equals(t, tg2, tg) tick := time.NewTicker(2 * interval) defer tick.Stop() select { case <-ch: t.Fatal("Unexpected target group") case <-tick.C: } } prometheus-2.15.2+ds/discovery/targetgroup/000077500000000000000000000000001360540074000207475ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/targetgroup/targetgroup.go000066400000000000000000000052051360540074000236430ustar00rootroot00000000000000// Copyright 2013 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package targetgroup import ( "bytes" "encoding/json" "github.com/prometheus/common/model" ) // Group is a set of targets with a common label set(production , test, staging etc.). type Group struct { // Targets is a list of targets identified by a label set. Each target is // uniquely identifiable in the group by its address label. Targets []model.LabelSet // Labels is a set of labels that is common across all targets in the group. Labels model.LabelSet // Source is an identifier that describes a group of targets. Source string } func (tg Group) String() string { return tg.Source } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (tg *Group) UnmarshalYAML(unmarshal func(interface{}) error) error { g := struct { Targets []string `yaml:"targets"` Labels model.LabelSet `yaml:"labels"` }{} if err := unmarshal(&g); err != nil { return err } tg.Targets = make([]model.LabelSet, 0, len(g.Targets)) for _, t := range g.Targets { tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: model.LabelValue(t), }) } tg.Labels = g.Labels return nil } // MarshalYAML implements the yaml.Marshaler interface. func (tg Group) MarshalYAML() (interface{}, error) { g := &struct { Targets []string `yaml:"targets"` Labels model.LabelSet `yaml:"labels,omitempty"` }{ Targets: make([]string, 0, len(tg.Targets)), Labels: tg.Labels, } for _, t := range tg.Targets { g.Targets = append(g.Targets, string(t[model.AddressLabel])) } return g, nil } // UnmarshalJSON implements the json.Unmarshaler interface. func (tg *Group) UnmarshalJSON(b []byte) error { g := struct { Targets []string `json:"targets"` Labels model.LabelSet `json:"labels"` }{} dec := json.NewDecoder(bytes.NewReader(b)) dec.DisallowUnknownFields() if err := dec.Decode(&g); err != nil { return err } tg.Targets = make([]model.LabelSet, 0, len(g.Targets)) for _, t := range g.Targets { tg.Targets = append(tg.Targets, model.LabelSet{ model.AddressLabel: model.LabelValue(t), }) } tg.Labels = g.Labels return nil } prometheus-2.15.2+ds/discovery/targetgroup/targetgroup_test.go000066400000000000000000000107021360540074000247000ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package targetgroup import ( "errors" "testing" "github.com/prometheus/common/model" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/util/testutil" ) func TestTargetGroupStrictJsonUnmarshal(t *testing.T) { tests := []struct { json string expectedReply error expectedGroup Group }{ { json: ` {"labels": {},"targets": []}`, expectedReply: nil, expectedGroup: Group{Targets: []model.LabelSet{}, Labels: model.LabelSet{}}, }, { json: ` {"labels": {"my":"label"},"targets": ["localhost:9090","localhost:9091"]}`, expectedReply: nil, expectedGroup: Group{Targets: []model.LabelSet{ model.LabelSet{"__address__": "localhost:9090"}, model.LabelSet{"__address__": "localhost:9091"}}, Labels: model.LabelSet{"my": "label"}}, }, { json: ` {"label": {},"targets": []}`, expectedReply: errors.New("json: unknown field \"label\""), }, { json: ` {"labels": {},"target": []}`, expectedReply: errors.New("json: unknown field \"target\""), }, } for _, test := range tests { tg := Group{} actual := tg.UnmarshalJSON([]byte(test.json)) testutil.Equals(t, test.expectedReply, actual) testutil.Equals(t, test.expectedGroup, tg) } } func TestTargetGroupYamlMarshal(t *testing.T) { marshal := func(g interface{}) []byte { d, err := yaml.Marshal(g) if err != nil { panic(err) } return d } tests := []struct { expectedYaml string expectetedErr error group Group }{ { // labels should be omitted if empty. group: Group{}, expectedYaml: "targets: []\n", expectetedErr: nil, }, { // targets only exposes addresses. group: Group{Targets: []model.LabelSet{ model.LabelSet{"__address__": "localhost:9090"}, model.LabelSet{"__address__": "localhost:9091"}}, Labels: model.LabelSet{"foo": "bar", "bar": "baz"}}, expectedYaml: "targets:\n- localhost:9090\n- localhost:9091\nlabels:\n bar: baz\n foo: bar\n", expectetedErr: nil, }, } for _, test := range tests { actual, err := test.group.MarshalYAML() testutil.Equals(t, test.expectetedErr, err) testutil.Equals(t, test.expectedYaml, string(marshal(actual))) } } func TestTargetGroupYamlUnmarshal(t *testing.T) { unmarshal := func(d []byte) func(interface{}) error { return func(o interface{}) error { return yaml.Unmarshal(d, o) } } tests := []struct { yaml string expectedGroup Group expectedReply error }{ { // empty target group. yaml: "labels:\ntargets:\n", expectedGroup: Group{Targets: []model.LabelSet{}}, expectedReply: nil, }, { // brackets syntax. yaml: "labels:\n my: label\ntargets:\n ['localhost:9090', 'localhost:9191']", expectedReply: nil, expectedGroup: Group{Targets: []model.LabelSet{ model.LabelSet{"__address__": "localhost:9090"}, model.LabelSet{"__address__": "localhost:9191"}}, Labels: model.LabelSet{"my": "label"}}, }, { // incorrect syntax. yaml: "labels:\ntargets:\n 'localhost:9090'", expectedReply: &yaml.TypeError{Errors: []string{"line 3: cannot unmarshal !!str `localho...` into []string"}}, }, } for _, test := range tests { tg := Group{} actual := tg.UnmarshalYAML(unmarshal([]byte(test.yaml))) testutil.Equals(t, test.expectedReply, actual) testutil.Equals(t, test.expectedGroup, tg) } } func TestString(t *testing.T) { // String() should return only the source, regardless of other attributes. group1 := Group{Targets: []model.LabelSet{ model.LabelSet{"__address__": "localhost:9090"}, model.LabelSet{"__address__": "localhost:9091"}}, Source: "", Labels: model.LabelSet{"foo": "bar", "bar": "baz"}} group2 := Group{Targets: []model.LabelSet{}, Source: "", Labels: model.LabelSet{}} testutil.Equals(t, "", group1.String()) testutil.Equals(t, "", group2.String()) testutil.Equals(t, group1.String(), group2.String()) } prometheus-2.15.2+ds/discovery/triton/000077500000000000000000000000001360540074000177235ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/triton/triton.go000066400000000000000000000136401360540074000215750ustar00rootroot00000000000000// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package triton import ( "context" "encoding/json" "fmt" "io" "io/ioutil" "net/http" "net/url" "strings" "time" "github.com/go-kit/kit/log" conntrack "github.com/mwitkow/go-conntrack" "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/refresh" "github.com/prometheus/prometheus/discovery/targetgroup" ) const ( tritonLabel = model.MetaLabelPrefix + "triton_" tritonLabelGroups = tritonLabel + "groups" tritonLabelMachineID = tritonLabel + "machine_id" tritonLabelMachineAlias = tritonLabel + "machine_alias" tritonLabelMachineBrand = tritonLabel + "machine_brand" tritonLabelMachineImage = tritonLabel + "machine_image" tritonLabelServerID = tritonLabel + "server_id" ) // DefaultSDConfig is the default Triton SD configuration. var DefaultSDConfig = SDConfig{ Port: 9163, RefreshInterval: model.Duration(60 * time.Second), Version: 1, } // SDConfig is the configuration for Triton based service discovery. type SDConfig struct { Account string `yaml:"account"` DNSSuffix string `yaml:"dns_suffix"` Endpoint string `yaml:"endpoint"` Groups []string `yaml:"groups,omitempty"` Port int `yaml:"port"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"` Version int `yaml:"version"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultSDConfig type plain SDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if c.Account == "" { return errors.New("triton SD configuration requires an account") } if c.DNSSuffix == "" { return errors.New("triton SD configuration requires a dns_suffix") } if c.Endpoint == "" { return errors.New("triton SD configuration requires an endpoint") } if c.RefreshInterval <= 0 { return errors.New("triton SD configuration requires RefreshInterval to be a positive integer") } return nil } // DiscoveryResponse models a JSON response from the Triton discovery. type discoveryResponse struct { Containers []struct { Groups []string `json:"groups"` ServerUUID string `json:"server_uuid"` VMAlias string `json:"vm_alias"` VMBrand string `json:"vm_brand"` VMImageUUID string `json:"vm_image_uuid"` VMUUID string `json:"vm_uuid"` } `json:"containers"` } // Discovery periodically performs Triton-SD requests. It implements // the Discoverer interface. type Discovery struct { *refresh.Discovery client *http.Client interval time.Duration sdConfig *SDConfig } // New returns a new Discovery which periodically refreshes its targets. func New(logger log.Logger, conf *SDConfig) (*Discovery, error) { tls, err := config_util.NewTLSConfig(&conf.TLSConfig) if err != nil { return nil, err } transport := &http.Transport{ TLSClientConfig: tls, DialContext: conntrack.NewDialContextFunc( conntrack.DialWithTracing(), conntrack.DialWithName("triton_sd"), ), } client := &http.Client{Transport: transport} d := &Discovery{ client: client, interval: time.Duration(conf.RefreshInterval), sdConfig: conf, } d.Discovery = refresh.NewDiscovery( logger, "triton", time.Duration(conf.RefreshInterval), d.refresh, ) return d, nil } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version) if len(d.sdConfig.Groups) > 0 { groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ",")) endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups) } tg := &targetgroup.Group{ Source: endpoint, } req, err := http.NewRequest("GET", endpoint, nil) if err != nil { return nil, err } req = req.WithContext(ctx) resp, err := d.client.Do(req) if err != nil { return nil, errors.Wrap(err, "an error occurred when requesting targets from the discovery endpoint") } defer func() { io.Copy(ioutil.Discard, resp.Body) resp.Body.Close() }() data, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, errors.Wrap(err, "an error occurred when reading the response body") } dr := discoveryResponse{} err = json.Unmarshal(data, &dr) if err != nil { return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json") } for _, container := range dr.Containers { labels := model.LabelSet{ tritonLabelMachineID: model.LabelValue(container.VMUUID), tritonLabelMachineAlias: model.LabelValue(container.VMAlias), tritonLabelMachineBrand: model.LabelValue(container.VMBrand), tritonLabelMachineImage: model.LabelValue(container.VMImageUUID), tritonLabelServerID: model.LabelValue(container.ServerUUID), } addr := fmt.Sprintf("%s.%s:%d", container.VMUUID, d.sdConfig.DNSSuffix, d.sdConfig.Port) labels[model.AddressLabel] = model.LabelValue(addr) if len(container.Groups) > 0 { name := "," + strings.Join(container.Groups, ",") + "," labels[tritonLabelGroups] = model.LabelValue(name) } tg.Targets = append(tg.Targets, labels) } return []*targetgroup.Group{tg}, nil } prometheus-2.15.2+ds/discovery/triton/triton_test.go000066400000000000000000000124111360540074000226270ustar00rootroot00000000000000// Copyright 2016 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package triton import ( "context" "fmt" "net" "net/http" "net/http/httptest" "net/url" "strconv" "strings" "testing" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/util/testutil" ) var ( conf = SDConfig{ Account: "testAccount", DNSSuffix: "triton.example.com", Endpoint: "127.0.0.1", Port: 443, Version: 1, RefreshInterval: 1, TLSConfig: config.TLSConfig{InsecureSkipVerify: true}, } badconf = SDConfig{ Account: "badTestAccount", DNSSuffix: "bad.triton.example.com", Endpoint: "127.0.0.1", Port: 443, Version: 1, RefreshInterval: 1, TLSConfig: config.TLSConfig{ InsecureSkipVerify: false, KeyFile: "shouldnotexist.key", CAFile: "shouldnotexist.ca", CertFile: "shouldnotexist.cert", }, } groupsconf = SDConfig{ Account: "testAccount", DNSSuffix: "triton.example.com", Endpoint: "127.0.0.1", Groups: []string{"foo", "bar"}, Port: 443, Version: 1, RefreshInterval: 1, TLSConfig: config.TLSConfig{InsecureSkipVerify: true}, } ) func newTritonDiscovery(c SDConfig) (*Discovery, error) { return New(nil, &c) } func TestTritonSDNew(t *testing.T) { td, err := newTritonDiscovery(conf) testutil.Ok(t, err) testutil.Assert(t, td != nil, "") testutil.Assert(t, td.client != nil, "") testutil.Assert(t, td.interval != 0, "") testutil.Assert(t, td.sdConfig != nil, "") testutil.Equals(t, conf.Account, td.sdConfig.Account) testutil.Equals(t, conf.DNSSuffix, td.sdConfig.DNSSuffix) testutil.Equals(t, conf.Endpoint, td.sdConfig.Endpoint) testutil.Equals(t, conf.Port, td.sdConfig.Port) } func TestTritonSDNewBadConfig(t *testing.T) { td, err := newTritonDiscovery(badconf) testutil.NotOk(t, err) testutil.Assert(t, td == nil, "") } func TestTritonSDNewGroupsConfig(t *testing.T) { td, err := newTritonDiscovery(groupsconf) testutil.Ok(t, err) testutil.Assert(t, td != nil, "") testutil.Assert(t, td.client != nil, "") testutil.Assert(t, td.interval != 0, "") testutil.Assert(t, td.sdConfig != nil, "") testutil.Equals(t, groupsconf.Account, td.sdConfig.Account) testutil.Equals(t, groupsconf.DNSSuffix, td.sdConfig.DNSSuffix) testutil.Equals(t, groupsconf.Endpoint, td.sdConfig.Endpoint) testutil.Equals(t, groupsconf.Groups, td.sdConfig.Groups) testutil.Equals(t, groupsconf.Port, td.sdConfig.Port) } func TestTritonSDRefreshNoTargets(t *testing.T) { tgts := testTritonSDRefresh(t, "{\"containers\":[]}") testutil.Assert(t, tgts == nil, "") } func TestTritonSDRefreshMultipleTargets(t *testing.T) { var ( dstr = `{"containers":[ { "groups":["foo","bar","baz"], "server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131", "vm_alias":"server01", "vm_brand":"lx", "vm_image_uuid":"7b27a514-89d7-11e6-bee6-3f96f367bee7", "vm_uuid":"ad466fbf-46a2-4027-9b64-8d3cdb7e9072" }, { "server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6", "vm_alias":"server02", "vm_brand":"kvm", "vm_image_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6", "vm_uuid":"7b27a514-89d7-11e6-bee6-3f96f367bee7" }] }` ) tgts := testTritonSDRefresh(t, dstr) testutil.Assert(t, tgts != nil, "") testutil.Equals(t, 2, len(tgts)) } func TestTritonSDRefreshNoServer(t *testing.T) { var ( td, _ = newTritonDiscovery(conf) ) _, err := td.refresh(context.Background()) testutil.NotOk(t, err) testutil.Equals(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"), true) } func TestTritonSDRefreshCancelled(t *testing.T) { var ( td, _ = newTritonDiscovery(conf) ) ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := td.refresh(ctx) testutil.NotOk(t, err) testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true) } func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet { var ( td, _ = newTritonDiscovery(conf) s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, dstr) })) ) defer s.Close() u, err := url.Parse(s.URL) testutil.Ok(t, err) testutil.Assert(t, u != nil, "") host, strport, err := net.SplitHostPort(u.Host) testutil.Ok(t, err) testutil.Assert(t, host != "", "") testutil.Assert(t, strport != "", "") port, err := strconv.Atoi(strport) testutil.Ok(t, err) testutil.Assert(t, port != 0, "") td.sdConfig.Port = port tgs, err := td.refresh(context.Background()) testutil.Ok(t, err) testutil.Equals(t, 1, len(tgs)) tg := tgs[0] testutil.Assert(t, tg != nil, "") return tg.Targets } prometheus-2.15.2+ds/discovery/zookeeper/000077500000000000000000000000001360540074000204075ustar00rootroot00000000000000prometheus-2.15.2+ds/discovery/zookeeper/zookeeper.go000066400000000000000000000213451360540074000227460ustar00rootroot00000000000000// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package zookeeper import ( "context" "encoding/json" "fmt" "net" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/samuel/go-zookeeper/zk" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/treecache" ) var ( // DefaultServersetSDConfig is the default Serverset SD configuration. DefaultServersetSDConfig = ServersetSDConfig{ Timeout: model.Duration(10 * time.Second), } // DefaultNerveSDConfig is the default Nerve SD configuration. DefaultNerveSDConfig = NerveSDConfig{ Timeout: model.Duration(10 * time.Second), } ) // ServersetSDConfig is the configuration for Twitter serversets in Zookeeper based discovery. type ServersetSDConfig struct { Servers []string `yaml:"servers"` Paths []string `yaml:"paths"` Timeout model.Duration `yaml:"timeout,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *ServersetSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultServersetSDConfig type plain ServersetSDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.Servers) == 0 { return errors.New("serverset SD config must contain at least one Zookeeper server") } if len(c.Paths) == 0 { return errors.New("serverset SD config must contain at least one path") } for _, path := range c.Paths { if !strings.HasPrefix(path, "/") { return errors.Errorf("serverset SD config paths must begin with '/': %s", path) } } return nil } // NerveSDConfig is the configuration for AirBnB's Nerve in Zookeeper based discovery. type NerveSDConfig struct { Servers []string `yaml:"servers"` Paths []string `yaml:"paths"` Timeout model.Duration `yaml:"timeout,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *NerveSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultNerveSDConfig type plain NerveSDConfig err := unmarshal((*plain)(c)) if err != nil { return err } if len(c.Servers) == 0 { return errors.New("nerve SD config must contain at least one Zookeeper server") } if len(c.Paths) == 0 { return errors.New("nerve SD config must contain at least one path") } for _, path := range c.Paths { if !strings.HasPrefix(path, "/") { return errors.Errorf("nerve SD config paths must begin with '/': %s", path) } } return nil } // Discovery implements the Discoverer interface for discovering // targets from Zookeeper. type Discovery struct { conn *zk.Conn sources map[string]*targetgroup.Group updates chan treecache.ZookeeperTreeCacheEvent pathUpdates []chan treecache.ZookeeperTreeCacheEvent treeCaches []*treecache.ZookeeperTreeCache parse func(data []byte, path string) (model.LabelSet, error) logger log.Logger } // NewNerveDiscovery returns a new Discovery for the given Nerve config. func NewNerveDiscovery(conf *NerveSDConfig, logger log.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseNerveMember) } // NewServersetDiscovery returns a new Discovery for the given serverset config. func NewServersetDiscovery(conf *ServersetSDConfig, logger log.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseServersetMember) } // NewDiscovery returns a new discovery along Zookeeper parses with // the given parse function. func NewDiscovery( srvs []string, timeout time.Duration, paths []string, logger log.Logger, pf func(data []byte, path string) (model.LabelSet, error), ) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } conn, _, err := zk.Connect( srvs, timeout, func(c *zk.Conn) { c.SetLogger(treecache.NewZookeeperLogger(logger)) }) if err != nil { return nil, err } updates := make(chan treecache.ZookeeperTreeCacheEvent) sd := &Discovery{ conn: conn, updates: updates, sources: map[string]*targetgroup.Group{}, parse: pf, logger: logger, } for _, path := range paths { pathUpdate := make(chan treecache.ZookeeperTreeCacheEvent) sd.pathUpdates = append(sd.pathUpdates, pathUpdate) sd.treeCaches = append(sd.treeCaches, treecache.NewZookeeperTreeCache(conn, path, pathUpdate, logger)) } return sd, nil } // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { defer func() { for _, tc := range d.treeCaches { tc.Stop() } for _, pathUpdate := range d.pathUpdates { // Drain event channel in case the treecache leaks goroutines otherwise. for range pathUpdate { } } d.conn.Close() }() for _, pathUpdate := range d.pathUpdates { go func(update chan treecache.ZookeeperTreeCacheEvent) { for event := range update { select { case d.updates <- event: case <-ctx.Done(): return } } }(pathUpdate) } for { select { case <-ctx.Done(): return case event := <-d.updates: tg := &targetgroup.Group{ Source: event.Path, } if event.Data != nil { labelSet, err := d.parse(*event.Data, event.Path) if err == nil { tg.Targets = []model.LabelSet{labelSet} d.sources[event.Path] = tg } else { delete(d.sources, event.Path) } } else { delete(d.sources, event.Path) } select { case <-ctx.Done(): return case ch <- []*targetgroup.Group{tg}: } } } } const ( serversetLabelPrefix = model.MetaLabelPrefix + "serverset_" serversetStatusLabel = serversetLabelPrefix + "status" serversetPathLabel = serversetLabelPrefix + "path" serversetEndpointLabelPrefix = serversetLabelPrefix + "endpoint" serversetShardLabel = serversetLabelPrefix + "shard" ) type serversetMember struct { ServiceEndpoint serversetEndpoint AdditionalEndpoints map[string]serversetEndpoint Status string `json:"status"` Shard int `json:"shard"` } type serversetEndpoint struct { Host string Port int } func parseServersetMember(data []byte, path string) (model.LabelSet, error) { member := serversetMember{} if err := json.Unmarshal(data, &member); err != nil { return nil, errors.Wrapf(err, "error unmarshaling serverset member %q", path) } labels := model.LabelSet{} labels[serversetPathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( net.JoinHostPort(member.ServiceEndpoint.Host, fmt.Sprintf("%d", member.ServiceEndpoint.Port))) labels[serversetEndpointLabelPrefix+"_host"] = model.LabelValue(member.ServiceEndpoint.Host) labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.ServiceEndpoint.Port)) for name, endpoint := range member.AdditionalEndpoints { cleanName := model.LabelName(strutil.SanitizeLabelName(name)) labels[serversetEndpointLabelPrefix+"_host_"+cleanName] = model.LabelValue( endpoint.Host) labels[serversetEndpointLabelPrefix+"_port_"+cleanName] = model.LabelValue( fmt.Sprintf("%d", endpoint.Port)) } labels[serversetStatusLabel] = model.LabelValue(member.Status) labels[serversetShardLabel] = model.LabelValue(strconv.Itoa(member.Shard)) return labels, nil } const ( nerveLabelPrefix = model.MetaLabelPrefix + "nerve_" nervePathLabel = nerveLabelPrefix + "path" nerveEndpointLabelPrefix = nerveLabelPrefix + "endpoint" ) type nerveMember struct { Host string `json:"host"` Port int `json:"port"` Name string `json:"name"` } func parseNerveMember(data []byte, path string) (model.LabelSet, error) { member := nerveMember{} err := json.Unmarshal(data, &member) if err != nil { return nil, errors.Wrapf(err, "error unmarshaling nerve member %q", path) } labels := model.LabelSet{} labels[nervePathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( net.JoinHostPort(member.Host, fmt.Sprintf("%d", member.Port))) labels[nerveEndpointLabelPrefix+"_host"] = model.LabelValue(member.Host) labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.Port)) labels[nerveEndpointLabelPrefix+"_name"] = model.LabelValue(member.Name) return labels, nil } prometheus-2.15.2+ds/discovery/zookeeper/zookeeper_test.go000066400000000000000000000016751360540074000240110ustar00rootroot00000000000000// Copyright 2018 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package zookeeper import ( "testing" "time" "github.com/prometheus/common/model" ) func TestNewDiscoveryError(t *testing.T) { _, err := NewDiscovery( []string{"unreachable.test"}, time.Second, []string{"/"}, nil, func(data []byte, path string) (model.LabelSet, error) { return nil, nil }) if err == nil { t.Fatalf("expected error, got nil") } } prometheus-2.15.2+ds/docs/000077500000000000000000000000001360540074000153255ustar00rootroot00000000000000prometheus-2.15.2+ds/docs/configuration/000077500000000000000000000000001360540074000201745ustar00rootroot00000000000000prometheus-2.15.2+ds/docs/configuration/alerting_rules.md000066400000000000000000000102441360540074000235360ustar00rootroot00000000000000--- title: Alerting rules sort_rank: 3 --- # Alerting rules Alerting rules allow you to define alert conditions based on Prometheus expression language expressions and to send notifications about firing alerts to an external service. Whenever the alert expression results in one or more vector elements at a given point in time, the alert counts as active for these elements' label sets. ### Defining alerting rules Alerting rules are configured in Prometheus in the same way as [recording rules](recording_rules.md). An example rules file with an alert would be: ```yaml groups: - name: example rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m labels: severity: page annotations: summary: High request latency ``` The optional `for` clause causes Prometheus to wait for a certain duration between first encountering a new expression output vector element and counting an alert as firing for this element. In this case, Prometheus will check that the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. The `annotations` clause specifies a set of informational labels that can be used to store longer additional information such as alert descriptions or runbook links. The annotation values can be templated. #### Templating Label and annotation values can be templated using [console templates](https://prometheus.io/docs/visualization/consoles). The `$labels` variable holds the label key/value pairs of an alert instance. The configured external labels can be accessed via the `$externalLabels` variable. The `$value` variable holds the evaluated value of an alert instance. # To insert a firing element's label values: {{ $labels. }} # To insert the numeric expression value of the firing element: {{ $value }} Examples: ```yaml groups: - name: example rules: # Alert for any instance that is unreachable for >5 minutes. - alert: InstanceDown expr: up == 0 for: 5m labels: severity: page annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." # Alert for any instance that has a median request latency >1s. - alert: APIHighRequestLatency expr: api_http_request_latencies_second{quantile="0.5"} > 1 for: 10m annotations: summary: "High request latency on {{ $labels.instance }}" description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)" ``` ### Inspecting alerts during runtime To manually inspect which alerts are active (pending or firing), navigate to the "Alerts" tab of your Prometheus instance. This will show you the exact label sets for which each defined alert is currently active. For pending and firing alerts, Prometheus also stores synthetic time series of the form `ALERTS{alertname="", alertstate="pending|firing", }`. The sample value is set to `1` as long as the alert is in the indicated active (pending or firing) state, and the series is marked stale when this is no longer the case. ### Sending alert notifications Prometheus's alerting rules are good at figuring what is broken *right now*, but they are not a fully-fledged notification solution. Another layer is needed to add summarization, notification rate limiting, silencing and alert dependencies on top of the simple alert definitions. In Prometheus's ecosystem, the [Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) takes on this role. Thus, Prometheus may be configured to periodically send information about alert states to an Alertmanager instance, which then takes care of dispatching the right notifications. Prometheus can be [configured](configuration.md) to automatically discover available Alertmanager instances through its service discovery integrations. prometheus-2.15.2+ds/docs/configuration/configuration.md000066400000000000000000001511121360540074000233660ustar00rootroot00000000000000--- title: Configuration sort_rank: 1 --- # Configuration Prometheus is configured via command-line flags and a configuration file. While the command-line flags configure immutable system parameters (such as storage locations, amount of data to keep on disk and in memory, etc.), the configuration file defines everything related to scraping [jobs and their instances](https://prometheus.io/docs/concepts/jobs_instances/), as well as which [rule files to load](recording_rules.md#configuring-rules). To view all available command-line flags, run `./prometheus -h`. Prometheus can reload its configuration at runtime. If the new configuration is not well-formed, the changes will not be applied. A configuration reload is triggered by sending a `SIGHUP` to the Prometheus process or sending a HTTP POST request to the `/-/reload` endpoint (when the `--web.enable-lifecycle` flag is enabled). This will also reload any configured rule files. ## Configuration file To specify which configuration file to load, use the `--config.file` flag. The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML), defined by the scheme described below. Brackets indicate that a parameter is optional. For non-list parameters the value is set to the specified default. Generic placeholders are defined as follows: * ``: a boolean that can take the values `true` or `false` * ``: a duration matching the regular expression `[0-9]+(ms|[smhdwy])` * ``: a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` * ``: a string of unicode characters * ``: a valid path in the current working directory * ``: a valid string consisting of a hostname or IP followed by an optional port number * ``: a valid URL path * ``: a string that can take the values `http` or `https` * ``: a regular string * ``: a regular string that is a secret, such as a password * ``: a string which is template-expanded before usage The other placeholders are specified separately. A valid example file can be found [here](/config/testdata/conf.good.yml). The global configuration specifies parameters that are valid in all other configuration contexts. They also serve as defaults for other configuration sections. ```yaml global: # How frequently to scrape targets by default. [ scrape_interval: | default = 1m ] # How long until a scrape request times out. [ scrape_timeout: | default = 10s ] # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] # The labels to add to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). external_labels: [ : ... ] # Rule files specifies a list of globs. Rules and alerts are read from # all matching files. rule_files: [ - ... ] # A list of scrape configurations. scrape_configs: [ - ... ] # Alerting specifies settings related to the Alertmanager. alerting: alert_relabel_configs: [ - ... ] alertmanagers: [ - ... ] # Settings related to the remote write feature. remote_write: [ - ... ] # Settings related to the remote read feature. remote_read: [ - ... ] ``` ### `` A `scrape_config` section specifies a set of targets and parameters describing how to scrape them. In the general case, one scrape configuration specifies a single job. In advanced configurations, this may change. Targets may be statically configured via the `static_configs` parameter or dynamically discovered using one of the supported service-discovery mechanisms. Additionally, `relabel_configs` allow advanced modifications to any target and its labels before scraping. ```yaml # The job name assigned to scraped metrics by default. job_name: # How frequently to scrape targets from this job. [ scrape_interval: | default = ] # Per-scrape timeout when scraping this job. [ scrape_timeout: | default = ] # The HTTP resource path on which to fetch metrics from targets. [ metrics_path: | default = /metrics ] # honor_labels controls how Prometheus handles conflicts between labels that are # already present in scraped data and labels that Prometheus would attach # server-side ("job" and "instance" labels, manually configured target # labels, and labels generated by service discovery implementations). # # If honor_labels is set to "true", label conflicts are resolved by keeping label # values from the scraped data and ignoring the conflicting server-side labels. # # If honor_labels is set to "false", label conflicts are resolved by renaming # conflicting labels in the scraped data to "exported_" (for # example "exported_instance", "exported_job") and then attaching server-side # labels. # # Setting honor_labels to "true" is useful for use cases such as federation and # scraping the Pushgateway, where all labels specified in the target should be # preserved. # # Note that any globally configured "external_labels" are unaffected by this # setting. In communication with external systems, they are always applied only # when a time series does not have a given label yet and are ignored otherwise. [ honor_labels: | default = false ] # honor_timestamps controls whether Prometheus respects the timestamps present # in scraped data. # # If honor_timestamps is set to "true", the timestamps of the metrics exposed # by the target will be used. # # If honor_timestamps is set to "false", the timestamps of the metrics exposed # by the target will be ignored. [ honor_timestamps: | default = true ] # Configures the protocol scheme used for requests. [ scheme: | default = http ] # Optional HTTP URL parameters. params: [ : [, ...] ] # Sets the `Authorization` header on every scrape request with the # configured username and password. # password and password_file are mutually exclusive. basic_auth: [ username: ] [ password: ] [ password_file: ] # Sets the `Authorization` header on every scrape request with # the configured bearer token. It is mutually exclusive with `bearer_token_file`. [ bearer_token: ] # Sets the `Authorization` header on every scrape request with the bearer token # read from the configured file. It is mutually exclusive with `bearer_token`. [ bearer_token_file: /path/to/bearer/token/file ] # Configures the scrape request's TLS settings. tls_config: [ ] # Optional proxy URL. [ proxy_url: ] # List of Azure service discovery configurations. azure_sd_configs: [ - ... ] # List of Consul service discovery configurations. consul_sd_configs: [ - ... ] # List of DNS service discovery configurations. dns_sd_configs: [ - ... ] # List of EC2 service discovery configurations. ec2_sd_configs: [ - ... ] # List of OpenStack service discovery configurations. openstack_sd_configs: [ - ... ] # List of file service discovery configurations. file_sd_configs: [ - ... ] # List of GCE service discovery configurations. gce_sd_configs: [ - ... ] # List of Kubernetes service discovery configurations. kubernetes_sd_configs: [ - ... ] # List of Marathon service discovery configurations. marathon_sd_configs: [ - ... ] # List of AirBnB's Nerve service discovery configurations. nerve_sd_configs: [ - ... ] # List of Zookeeper Serverset service discovery configurations. serverset_sd_configs: [ - ... ] # List of Triton service discovery configurations. triton_sd_configs: [ - ... ] # List of labeled statically configured targets for this job. static_configs: [ - ... ] # List of target relabel configurations. relabel_configs: [ - ... ] # List of metric relabel configurations. metric_relabel_configs: [ - ... ] # Per-scrape limit on number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabelling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] ``` Where `` must be unique across all scrape configurations. ### `` A `tls_config` allows configuring TLS connections. ```yaml # CA certificate to validate API server certificate with. [ ca_file: ] # Certificate and key files for client cert authentication to the server. [ cert_file: ] [ key_file: ] # ServerName extension to indicate the name of the server. # https://tools.ietf.org/html/rfc4366#section-3.1 [ server_name: ] # Disable validation of the server certificate. [ insecure_skip_verify: ] ``` ### `` Azure SD configurations allow retrieving scrape targets from Azure VMs. The following meta labels are available on targets during relabeling: * `__meta_azure_machine_id`: the machine ID * `__meta_azure_machine_location`: the location the machine runs in * `__meta_azure_machine_name`: the machine name * `__meta_azure_machine_os_type`: the machine operating system * `__meta_azure_machine_private_ip`: the machine's private IP * `__meta_azure_machine_public_ip`: the machine's public IP if it exists * `__meta_azure_machine_resource_group`: the machine's resource group * `__meta_azure_machine_tag_`: each tag value of the machine * `__meta_azure_machine_scale_set`: the name of the scale set which the vm is part of (this value is only set if you are using a [scale set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/)) * `__meta_azure_subscription_id`: the subscription ID * `__meta_azure_tenant_id`: the tenant ID See below for the configuration options for Azure discovery: ```yaml # The information to access the Azure API. # The Azure environment. [ environment: | default = AzurePublicCloud ] # The authentication method, either OAuth or ManagedIdentity. # See https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview [ authentication_method: | default = OAuth] # The subscription ID. Always required. subscription_id: # Optional tenant ID. Only required with authentication_method OAuth. [ tenant_id: ] # Optional client ID. Only required with authentication_method OAuth. [ client_id: ] # Optional client secret. Only required with authentication_method OAuth. [ client_secret: ] # Refresh interval to re-read the instance list. [ refresh_interval: | default = 300s ] # The port to scrape metrics from. If using the public IP address, this must # instead be specified in the relabeling rule. [ port: | default = 80 ] ``` ### `` Consul SD configurations allow retrieving scrape targets from [Consul's](https://www.consul.io) Catalog API. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_consul_address`: the address of the target * `__meta_consul_dc`: the datacenter name for the target * `__meta_consul_tagged_address_`: each node tagged address key value of the target * `__meta_consul_metadata_`: each node metadata key value of the target * `__meta_consul_node`: the node name defined for the target * `__meta_consul_service_address`: the service address of the target * `__meta_consul_service_id`: the service ID of the target * `__meta_consul_service_metadata_`: each service metadata key value of the target * `__meta_consul_service_port`: the service port of the target * `__meta_consul_service`: the name of the service the target belongs to * `__meta_consul_tags`: the list of tags of the target joined by the tag separator ```yaml # The information to access the Consul API. It is to be defined # as the Consul documentation requires. [ server: | default = "localhost:8500" ] [ token: ] [ datacenter: ] [ scheme: | default = "http" ] [ username: ] [ password: ] tls_config: [ ] # A list of services for which targets are retrieved. If omitted, all services # are scraped. services: [ - ] # See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more # about the possible filters that can be used. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. tags: [ - ] # Node metadata used to filter nodes for a given service. [ node_meta: [ : ... ] ] # The string by which Consul tags are joined into the tag label. [ tag_separator: | default = , ] # Allow stale Consul results (see https://www.consul.io/api/features/consistency.html). Will reduce load on Consul. [ allow_stale: ] # The time after which the provided names are refreshed. # On large setup it might be a good idea to increase this value because the catalog will change all the time. [ refresh_interval: | default = 30s ] ``` Note that the IP number and port used to scrape the targets is assembled as `<__meta_consul_address>:<__meta_consul_service_port>`. However, in some Consul setups, the relevant address is in `__meta_consul_service_address`. In those cases, you can use the [relabel](#relabel_config) feature to replace the special `__address__` label. The [relabeling phase](#relabel_config) is the preferred and more powerful way to filter services or nodes for a service based on arbitrary labels. For users with thousands of services it can be more efficient to use the Consul API directly which has basic support for filtering nodes (currently by node metadata and a single tag). ### `` A DNS-based service discovery configuration allows specifying a set of DNS domain names which are periodically queried to discover a list of targets. The DNS servers to be contacted are read from `/etc/resolv.conf`. This service discovery method only supports basic DNS A, AAAA and SRV record queries, but not the advanced DNS-SD approach specified in [RFC6763](https://tools.ietf.org/html/rfc6763). During the [relabeling phase](#relabel_config), the meta label `__meta_dns_name` is available on each target and is set to the record name that produced the discovered target. ```yaml # A list of DNS domain names to be queried. names: [ - ] # The type of DNS query to perform. [ type: | default = 'SRV' ] # The port number used if the query type is not SRV. [ port: ] # The time after which the provided names are refreshed. [ refresh_interval: | default = 30s ] ``` Where `` is a valid DNS domain name. Where `` is `SRV`, `A`, or `AAAA`. ### `` EC2 SD configurations allow retrieving scrape targets from AWS EC2 instances. The private IP address is used by default, but may be changed to the public IP address with relabeling. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_ec2_availability_zone`: the availability zone in which the instance is running * `__meta_ec2_instance_id`: the EC2 instance ID * `__meta_ec2_instance_state`: the state of the EC2 instance * `__meta_ec2_instance_type`: the type of the EC2 instance * `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance * `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise * `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available * `__meta_ec2_private_dns_name`: the private DNS name of the instance, if available * `__meta_ec2_private_ip`: the private IP address of the instance, if present * `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available * `__meta_ec2_public_ip`: the public IP address of the instance, if available * `__meta_ec2_subnet_id`: comma separated list of subnets IDs in which the instance is running, if available * `__meta_ec2_tag_`: each tag value of the instance * `__meta_ec2_vpc_id`: the ID of the VPC in which the instance is running, if available See below for the configuration options for EC2 discovery: ```yaml # The information to access the EC2 API. # The AWS region. If blank, the region from the instance metadata is used. [ region: ] # Custom endpoint to be used. [ endpoint: ] # The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` # and `AWS_SECRET_ACCESS_KEY` are used. [ access_key: ] [ secret_key: ] # Named AWS profile used to connect to the API. [ profile: ] # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] # Refresh interval to re-read the instance list. [ refresh_interval: | default = 60s ] # The port to scrape metrics from. If using the public IP address, this must # instead be specified in the relabeling rule. [ port: | default = 80 ] # Filters can be used optionally to filter the instance list by other criteria. # Available filter criteria can be found here: # https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html # Filter API documentation: https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Filter.html filters: [ - name: values: , [...] ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful way to filter targets based on arbitrary labels. For users with thousands of instances it can be more efficient to use the EC2 API directly which has support for filtering instances. ### `` OpenStack SD configurations allow retrieving scrape targets from OpenStack Nova instances. One of the following `` types can be configured to discover targets: #### `hypervisor` The `hypervisor` role discovers one target per Nova hypervisor node. The target address defaults to the `host_ip` attribute of the hypervisor. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_openstack_hypervisor_host_ip`: the hypervisor node's IP address. * `__meta_openstack_hypervisor_name`: the hypervisor node's name. * `__meta_openstack_hypervisor_state`: the hypervisor node's state. * `__meta_openstack_hypervisor_status`: the hypervisor node's status. * `__meta_openstack_hypervisor_type`: the hypervisor node's type. #### `instance` The `instance` role discovers one target per network interface of Nova instance. The target address defaults to the private IP address of the network interface. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_openstack_address_pool`: the pool of the private IP. * `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance. * `__meta_openstack_instance_id`: the OpenStack instance ID. * `__meta_openstack_instance_name`: the OpenStack instance name. * `__meta_openstack_instance_status`: the status of the OpenStack instance. * `__meta_openstack_private_ip`: the private IP of the OpenStack instance. * `__meta_openstack_project_id`: the project (tenant) owning this instance. * `__meta_openstack_public_ip`: the public IP of the OpenStack instance. * `__meta_openstack_tag_`: each tag value of the instance. * `__meta_openstack_user_id`: the user account owning the tenant. See below for the configuration options for OpenStack discovery: ```yaml # The information to access the OpenStack API. # The OpenStack role of entities that should be discovered. role: # The OpenStack Region. region: # identity_endpoint specifies the HTTP endpoint that is required to work with # the Identity API of the appropriate version. While it's ultimately needed by # all of the identity services, it will often be populated by a provider-level # function. [ identity_endpoint: ] # username is required if using Identity V2 API. Consult with your provider's # control panel to discover your account's username. In Identity V3, either # userid or a combination of username and domain_id or domain_name are needed. [ username: ] [ userid: ] # password for the Identity V2 and V3 APIs. Consult with your provider's # control panel to discover your account's preferred method of authentication. [ password: ] # At most one of domain_id and domain_name must be provided if using username # with Identity V3. Otherwise, either are optional. [ domain_name: ] [ domain_id: ] # The project_id and project_name fields are optional for the Identity V2 API. # Some providers allow you to specify a project_name instead of the project_id. # Some require both. Your provider's authentication policies will determine # how these fields influence authentication. [ project_name: ] [ project_id: ] # The application_credential_id or application_credential_name fields are # required if using an application credential to authenticate. Some providers # allow you to create an application credential to authenticate rather than a # password. [ application_credential_name: ] [ application_credential_id: ] # The application_credential_secret field is required if using an application # credential to authenticate. [ application_credential_secret: ] # Whether the service discovery should list all instances for all projects. # It is only relevant for the 'instance' role and usually requires admin permissions. [ all_tenants: | default: false ] # Refresh interval to re-read the instance list. [ refresh_interval: | default = 60s ] # The port to scrape metrics from. If using the public IP address, this must # instead be specified in the relabeling rule. [ port: | default = 80 ] # TLS configuration. tls_config: [ ] ``` ### `` File-based service discovery provides a more generic way to configure static targets and serves as an interface to plug in custom service discovery mechanisms. It reads a set of files containing a list of zero or more ``s. Changes to all defined files are detected via disk watches and applied immediately. Files may be provided in YAML or JSON format. Only changes resulting in well-formed target groups are applied. The JSON file must contain a list of static configs, using this format: ```yaml [ { "targets": [ "", ... ], "labels": { "": "", ... } }, ... ] ``` As a fallback, the file contents are also re-read periodically at the specified refresh interval. Each target has a meta label `__meta_filepath` during the [relabeling phase](#relabel_config). Its value is set to the filepath from which the target was extracted. There is a list of [integrations](https://prometheus.io/docs/operating/integrations/#file-service-discovery) with this discovery mechanism. ```yaml # Patterns for files from which target groups are extracted. files: [ - ... ] # Refresh interval to re-read the files. [ refresh_interval: | default = 5m ] ``` Where `` may be a path ending in `.json`, `.yml` or `.yaml`. The last path segment may contain a single `*` that matches any character sequence, e.g. `my/path/tg_*.json`. ### `` [GCE](https://cloud.google.com/compute/) SD configurations allow retrieving scrape targets from GCP GCE instances. The private IP address is used by default, but may be changed to the public IP address with relabeling. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_gce_instance_id`: the numeric id of the instance * `__meta_gce_instance_name`: the name of the instance * `__meta_gce_label_`: each GCE label of the instance * `__meta_gce_machine_type`: full or partial URL of the machine type of the instance * `__meta_gce_metadata_`: each metadata item of the instance * `__meta_gce_network`: the network URL of the instance * `__meta_gce_private_ip`: the private IP address of the instance * `__meta_gce_project`: the GCP project in which the instance is running * `__meta_gce_public_ip`: the public IP address of the instance, if present * `__meta_gce_subnetwork`: the subnetwork URL of the instance * `__meta_gce_tags`: comma separated list of instance tags * `__meta_gce_zone`: the GCE zone URL in which the instance is running See below for the configuration options for GCE discovery: ```yaml # The information to access the GCE API. # The GCP Project project: # The zone of the scrape targets. If you need multiple zones use multiple # gce_sd_configs. zone: # Filter can be used optionally to filter the instance list by other criteria # Syntax of this filter string is described here in the filter query parameter section: # https://cloud.google.com/compute/docs/reference/latest/instances/list [ filter: ] # Refresh interval to re-read the instance list [ refresh_interval: | default = 60s ] # The port to scrape metrics from. If using the public IP address, this must # instead be specified in the relabeling rule. [ port: | default = 80 ] # The tag separator is used to separate the tags on concatenation [ tag_separator: | default = , ] ``` Credentials are discovered by the Google Cloud SDK default client by looking in the following places, preferring the first location found: 1. a JSON file specified by the `GOOGLE_APPLICATION_CREDENTIALS` environment variable 2. a JSON file in the well-known path `$HOME/.config/gcloud/application_default_credentials.json` 3. fetched from the GCE metadata server If Prometheus is running within GCE, the service account associated with the instance it is running on should have at least read-only permissions to the compute resources. If running outside of GCE make sure to create an appropriate service account and place the credential file in one of the expected locations. ### `` Kubernetes SD configurations allow retrieving scrape targets from [Kubernetes'](https://kubernetes.io/) REST API and always staying synchronized with the cluster state. One of the following `role` types can be configured to discover targets: #### `node` The `node` role discovers one target per cluster node with the address defaulting to the Kubelet's HTTP port. The target address defaults to the first existing address of the Kubernetes node object in the address type order of `NodeInternalIP`, `NodeExternalIP`, `NodeLegacyHostIP`, and `NodeHostName`. Available meta labels: * `__meta_kubernetes_node_name`: The name of the node object. * `__meta_kubernetes_node_label_`: Each label from the node object. * `__meta_kubernetes_node_labelpresent_`: `true` for each label from the node object. * `__meta_kubernetes_node_annotation_`: Each annotation from the node object. * `__meta_kubernetes_node_annotationpresent_`: `true` for each annotation from the node object. * `__meta_kubernetes_node_address_`: The first address for each node address type, if it exists. In addition, the `instance` label for the node will be set to the node name as retrieved from the API server. #### `service` The `service` role discovers a target for each service port for each service. This is generally useful for blackbox monitoring of a service. The address will be set to the Kubernetes DNS name of the service and respective service port. Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the service object. * `__meta_kubernetes_service_annotation_`: Each annotation from the service object. * `__meta_kubernetes_service_annotationpresent_`: "true" for each annotation of the service object. * `__meta_kubernetes_service_cluster_ip`: The cluster IP address of the service. (Does not apply to services of type ExternalName) * `__meta_kubernetes_service_external_name`: The DNS name of the service. (Applies to services of type ExternalName) * `__meta_kubernetes_service_label_`: Each label from the service object. * `__meta_kubernetes_service_labelpresent_`: `true` for each label of the service object. * `__meta_kubernetes_service_name`: The name of the service object. * `__meta_kubernetes_service_port_name`: Name of the service port for the target. * `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target. #### `pod` The `pod` role discovers all pods and exposes their containers as targets. For each declared port of a container, a single target is generated. If a container has no specified ports, a port-free target per container is created for manually adding a port via relabeling. Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the pod object. * `__meta_kubernetes_pod_name`: The name of the pod object. * `__meta_kubernetes_pod_ip`: The pod IP of the pod object. * `__meta_kubernetes_pod_label_`: Each label from the pod object. * `__meta_kubernetes_pod_labelpresent_`: `true`for each label from the pod object. * `__meta_kubernetes_pod_annotation_`: Each annotation from the pod object. * `__meta_kubernetes_pod_annotationpresent_`: `true` for each annotation from the pod object. * `__meta_kubernetes_pod_container_init`: `true` if the container is an [InitContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) * `__meta_kubernetes_pod_container_name`: Name of the container the target address points to. * `__meta_kubernetes_pod_container_port_name`: Name of the container port. * `__meta_kubernetes_pod_container_port_number`: Number of the container port. * `__meta_kubernetes_pod_container_port_protocol`: Protocol of the container port. * `__meta_kubernetes_pod_ready`: Set to `true` or `false` for the pod's ready state. * `__meta_kubernetes_pod_phase`: Set to `Pending`, `Running`, `Succeeded`, `Failed` or `Unknown` in the [lifecycle](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase). * `__meta_kubernetes_pod_node_name`: The name of the node the pod is scheduled onto. * `__meta_kubernetes_pod_host_ip`: The current host IP of the pod object. * `__meta_kubernetes_pod_uid`: The UID of the pod object. * `__meta_kubernetes_pod_controller_kind`: Object kind of the pod controller. * `__meta_kubernetes_pod_controller_name`: Name of the pod controller. #### `endpoints` The `endpoints` role discovers targets from listed endpoints of a service. For each endpoint address one target is discovered per port. If the endpoint is backed by a pod, all additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well. Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the endpoints object. * `__meta_kubernetes_endpoints_name`: The names of the endpoints object. * For all targets discovered directly from the endpoints list (those not additionally inferred from underlying pods), the following labels are attached: * `__meta_kubernetes_endpoint_hostname`: Hostname of the endpoint. * `__meta_kubernetes_endpoint_node_name`: Name of the node hosting the endpoint. * `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state. * `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port. * `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port. * `__meta_kubernetes_endpoint_address_target_kind`: Kind of the endpoint address target. * `__meta_kubernetes_endpoint_address_target_name`: Name of the endpoint address target. * If the endpoints belong to a service, all labels of the `role: service` discovery are attached. * For all targets backed by a pod, all labels of the `role: pod` discovery are attached. #### `ingress` The `ingress` role discovers a target for each path of each ingress. This is generally useful for blackbox monitoring of an ingress. The address will be set to the host specified in the ingress spec. Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the ingress object. * `__meta_kubernetes_ingress_name`: The name of the ingress object. * `__meta_kubernetes_ingress_label_`: Each label from the ingress object. * `__meta_kubernetes_ingress_labelpresent_`: `true` for each label from the ingress object. * `__meta_kubernetes_ingress_annotation_`: Each annotation from the ingress object. * `__meta_kubernetes_ingress_annotationpresent_`: `true` for each annotation from the ingress object. * `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS config is set. Defaults to `http`. * `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`. See below for the configuration options for Kubernetes discovery: ```yaml # The information to access the Kubernetes API. # The API server addresses. If left empty, Prometheus is assumed to run inside # of the cluster and will discover API servers automatically and use the pod's # CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/. [ api_server: ] # The Kubernetes role of entities that should be discovered. role: # Optional authentication information used to authenticate to the API server. # Note that `basic_auth`, `bearer_token` and `bearer_token_file` options are # mutually exclusive. # password and password_file are mutually exclusive. # Optional HTTP basic authentication information. basic_auth: [ username: ] [ password: ] [ password_file: ] # Optional bearer token authentication information. [ bearer_token: ] # Optional bearer token file authentication information. [ bearer_token_file: ] # Optional proxy URL. [ proxy_url: ] # TLS configuration. tls_config: [ ] # Optional namespace discovery. If omitted, all namespaces are used. namespaces: names: [ - ] ``` Where `` must be `endpoints`, `service`, `pod`, `node`, or `ingress`. See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml) for a detailed example of configuring Prometheus for Kubernetes. You may wish to check out the 3rd party [Prometheus Operator](https://github.com/coreos/prometheus-operator), which automates the Prometheus setup on top of Kubernetes. ### `` Marathon SD configurations allow retrieving scrape targets using the [Marathon](https://mesosphere.github.io/marathon/) REST API. Prometheus will periodically check the REST endpoint for currently running tasks and create a target group for every app that has at least one healthy task. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_marathon_app`: the name of the app (with slashes replaced by dashes) * `__meta_marathon_image`: the name of the Docker image used (if available) * `__meta_marathon_task`: the ID of the Mesos task * `__meta_marathon_app_label_`: any Marathon labels attached to the app * `__meta_marathon_port_definition_label_`: the port definition labels * `__meta_marathon_port_mapping_label_`: the port mapping labels * `__meta_marathon_port_index`: the port index number (e.g. `1` for `PORT1`) See below for the configuration options for Marathon discovery: ```yaml # List of URLs to be used to contact Marathon servers. # You need to provide at least one server URL. servers: - # Polling interval [ refresh_interval: | default = 30s ] # Optional authentication information for token-based authentication # https://docs.mesosphere.com/1.11/security/ent/iam-api/#passing-an-authentication-token # It is mutually exclusive with `auth_token_file` and other authentication mechanisms. [ auth_token: ] # Optional authentication information for token-based authentication # https://docs.mesosphere.com/1.11/security/ent/iam-api/#passing-an-authentication-token # It is mutually exclusive with `auth_token` and other authentication mechanisms. [ auth_token_file: ] # Sets the `Authorization` header on every request with the # configured username and password. # This is mutually exclusive with other authentication mechanisms. # password and password_file are mutually exclusive. basic_auth: [ username: ] [ password: ] [ password_file: ] # Sets the `Authorization` header on every request with # the configured bearer token. It is mutually exclusive with `bearer_token_file` and other authentication mechanisms. # NOTE: The current version of DC/OS marathon (v1.11.0) does not support standard Bearer token authentication. Use `auth_token` instead. [ bearer_token: ] # Sets the `Authorization` header on every request with the bearer token # read from the configured file. It is mutually exclusive with `bearer_token` and other authentication mechanisms. # NOTE: The current version of DC/OS marathon (v1.11.0) does not support standard Bearer token authentication. Use `auth_token_file` instead. [ bearer_token_file: /path/to/bearer/token/file ] # TLS configuration for connecting to marathon servers tls_config: [ ] # Optional proxy URL. [ proxy_url: ] ``` By default every app listed in Marathon will be scraped by Prometheus. If not all of your services provide Prometheus metrics, you can use a Marathon label and Prometheus relabeling to control which instances will actually be scraped. See [the Prometheus marathon-sd configuration file](/documentation/examples/prometheus-marathon.yml) for a practical example on how to set up your Marathon app and your Prometheus configuration. By default, all apps will show up as a single job in Prometheus (the one specified in the configuration file), which can also be changed using relabeling. ### `` Nerve SD configurations allow retrieving scrape targets from [AirBnB's Nerve] (https://github.com/airbnb/nerve) which are stored in [Zookeeper](https://zookeeper.apache.org/). The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_nerve_path`: the full path to the endpoint node in Zookeeper * `__meta_nerve_endpoint_host`: the host of the endpoint * `__meta_nerve_endpoint_port`: the port of the endpoint * `__meta_nerve_endpoint_name`: the name of the endpoint ```yaml # The Zookeeper servers. servers: - # Paths can point to a single service, or the root of a tree of services. paths: - [ timeout: | default = 10s ] ``` ### `` Serverset SD configurations allow retrieving scrape targets from [Serversets] (https://github.com/twitter/finagle/tree/master/finagle-serversets) which are stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly used by [Finagle](https://twitter.github.io/finagle/) and [Aurora](https://aurora.apache.org/). The following meta labels are available on targets during relabeling: * `__meta_serverset_path`: the full path to the serverset member node in Zookeeper * `__meta_serverset_endpoint_host`: the host of the default endpoint * `__meta_serverset_endpoint_port`: the port of the default endpoint * `__meta_serverset_endpoint_host_`: the host of the given endpoint * `__meta_serverset_endpoint_port_`: the port of the given endpoint * `__meta_serverset_shard`: the shard number of the member * `__meta_serverset_status`: the status of the member ```yaml # The Zookeeper servers. servers: - # Paths can point to a single serverset, or the root of a tree of serversets. paths: - [ timeout: | default = 10s ] ``` Serverset data must be in the JSON format, the Thrift format is not currently supported. ### `` [Triton](https://github.com/joyent/triton) SD configurations allow retrieving scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md) discovery endpoints. The following meta labels are available on targets during relabeling: * `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator * `__meta_triton_machine_alias`: the alias of the target container * `__meta_triton_machine_brand`: the brand of the target container * `__meta_triton_machine_id`: the UUID of the target container * `__meta_triton_machine_image`: the target containers image type * `__meta_triton_server_id`: the server UUID for the target container ```yaml # The information to access the Triton discovery API. # The account to use for discovering new target containers. account: # The DNS suffix which should be applied to target containers. dns_suffix: # The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is # often the same value as dns_suffix. endpoint: # A list of groups for which targets are retrieved. If omitted, all containers # available to the requesting account are scraped. groups: [ - ... ] # The port to use for discovery and metric scraping. [ port: | default = 9163 ] # The interval which should be used for refreshing target containers. [ refresh_interval: | default = 60s ] # The Triton discovery API version. [ version: | default = 1 ] # TLS configuration. tls_config: [ ] ``` ### `` A `static_config` allows specifying a list of targets and a common label set for them. It is the canonical way to specify static targets in a scrape configuration. ```yaml # The targets specified by the static config. targets: [ - '' ] # Labels assigned to all metrics scraped from the targets. labels: [ : ... ] ``` ### `` Relabeling is a powerful tool to dynamically rewrite the label set of a target before it gets scraped. Multiple relabeling steps can be configured per scrape configuration. They are applied to the label set of each target in order of their appearance in the configuration file. Initially, aside from the configured per-target labels, a target's `job` label is set to the `job_name` value of the respective scrape configuration. The `__address__` label is set to the `:` address of the target. After relabeling, the `instance` label is set to the value of `__address__` by default if it was not set during relabeling. The `__scheme__` and `__metrics_path__` labels are set to the scheme and metrics path of the target respectively. The `__param_` label is set to the value of the first passed URL parameter called ``. Additional labels prefixed with `__meta_` may be available during the relabeling phase. They are set by the service discovery mechanism that provided the target and vary between mechanisms. Labels starting with `__` will be removed from the label set after target relabeling is completed. If a relabeling step needs to store a label value only temporarily (as the input to a subsequent relabeling step), use the `__tmp` label name prefix. This prefix is guaranteed to never be used by Prometheus itself. ```yaml # The source labels select values from existing labels. Their content is concatenated # using the configured separator and matched against the configured regular expression # for the replace, keep, and drop actions. [ source_labels: '[' [, ...] ']' ] # Separator placed between concatenated source label values. [ separator: | default = ; ] # Label to which the resulting value is written in a replace action. # It is mandatory for replace actions. Regex capture groups are available. [ target_label: ] # Regular expression against which the extracted value is matched. [ regex: | default = (.*) ] # Modulus to take of the hash of the source label values. [ modulus: ] # Replacement value against which a regex replace is performed if the # regular expression matches. Regex capture groups are available. [ replacement: | default = $1 ] # Action to perform based on regex matching. [ action: | default = replace ] ``` `` is any valid [RE2 regular expression](https://github.com/google/re2/wiki/Syntax). It is required for the `replace`, `keep`, `drop`, `labelmap`,`labeldrop` and `labelkeep` actions. The regex is anchored on both ends. To un-anchor the regex, use `.*.*`. `` determines the relabeling action to take: * `replace`: Match `regex` against the concatenated `source_labels`. Then, set `target_label` to `replacement`, with match group references (`${1}`, `${2}`, ...) in `replacement` substituted by their value. If `regex` does not match, no replacement takes place. * `keep`: Drop targets for which `regex` does not match the concatenated `source_labels`. * `drop`: Drop targets for which `regex` matches the concatenated `source_labels`. * `hashmod`: Set `target_label` to the `modulus` of a hash of the concatenated `source_labels`. * `labelmap`: Match `regex` against all label names. Then copy the values of the matching labels to label names given by `replacement` with match group references (`${1}`, `${2}`, ...) in `replacement` substituted by their value. * `labeldrop`: Match `regex` against all label names. Any label that matches will be removed from the set of labels. * `labelkeep`: Match `regex` against all label names. Any label that does not match will be removed from the set of labels. Care must be taken with `labeldrop` and `labelkeep` to ensure that metrics are still uniquely labeled once the labels are removed. ### `` Metric relabeling is applied to samples as the last step before ingestion. It has the same configuration format and actions as target relabeling. Metric relabeling does not apply to automatically generated timeseries such as `up`. One use for this is to blacklist time series that are too expensive to ingest. ### `` Alert relabeling is applied to alerts before they are sent to the Alertmanager. It has the same configuration format and actions as target relabeling. Alert relabeling is applied after external labels. One use for this is ensuring a HA pair of Prometheus servers with different external labels send identical alerts. ### `` An `alertmanager_config` section specifies Alertmanager instances the Prometheus server sends alerts to. It also provides parameters to configure how to communicate with these Alertmanagers. Alertmanagers may be statically configured via the `static_configs` parameter or dynamically discovered using one of the supported service-discovery mechanisms. Additionally, `relabel_configs` allow selecting Alertmanagers from discovered entities and provide advanced modifications to the used API path, which is exposed through the `__alerts_path__` label. ```yaml # Per-target Alertmanager timeout when pushing alerts. [ timeout: | default = 10s ] # The api version of Alertmanager. [ api_version: | default = v1 ] # Prefix for the HTTP path alerts are pushed to. [ path_prefix: | default = / ] # Configures the protocol scheme used for requests. [ scheme: | default = http ] # Sets the `Authorization` header on every request with the # configured username and password. # password and password_file are mutually exclusive. basic_auth: [ username: ] [ password: ] [ password_file: ] # Sets the `Authorization` header on every request with # the configured bearer token. It is mutually exclusive with `bearer_token_file`. [ bearer_token: ] # Sets the `Authorization` header on every request with the bearer token # read from the configured file. It is mutually exclusive with `bearer_token`. [ bearer_token_file: /path/to/bearer/token/file ] # Configures the scrape request's TLS settings. tls_config: [ ] # Optional proxy URL. [ proxy_url: ] # List of Azure service discovery configurations. azure_sd_configs: [ - ... ] # List of Consul service discovery configurations. consul_sd_configs: [ - ... ] # List of DNS service discovery configurations. dns_sd_configs: [ - ... ] # List of EC2 service discovery configurations. ec2_sd_configs: [ - ... ] # List of file service discovery configurations. file_sd_configs: [ - ... ] # List of GCE service discovery configurations. gce_sd_configs: [ - ... ] # List of Kubernetes service discovery configurations. kubernetes_sd_configs: [ - ... ] # List of Marathon service discovery configurations. marathon_sd_configs: [ - ... ] # List of AirBnB's Nerve service discovery configurations. nerve_sd_configs: [ - ... ] # List of Zookeeper Serverset service discovery configurations. serverset_sd_configs: [ - ... ] # List of Triton service discovery configurations. triton_sd_configs: [ - ... ] # List of labeled statically configured Alertmanagers. static_configs: [ - ... ] # List of Alertmanager relabel configurations. relabel_configs: [ - ... ] ``` ### `` `write_relabel_configs` is relabeling applied to samples before sending them to the remote endpoint. Write relabeling is applied after external labels. This could be used to limit which samples are sent. There is a [small demo](/documentation/examples/remote_storage) of how to use this functionality. ```yaml # The URL of the endpoint to send samples to. url: # Timeout for requests to the remote write endpoint. [ remote_timeout: | default = 30s ] # List of remote write relabel configurations. write_relabel_configs: [ - ... ] # Sets the `Authorization` header on every remote write request with the # configured username and password. # password and password_file are mutually exclusive. basic_auth: [ username: ] [ password: ] [ password_file: ] # Sets the `Authorization` header on every remote write request with # the configured bearer token. It is mutually exclusive with `bearer_token_file`. [ bearer_token: ] # Sets the `Authorization` header on every remote write request with the bearer token # read from the configured file. It is mutually exclusive with `bearer_token`. [ bearer_token_file: /path/to/bearer/token/file ] # Configures the remote write request's TLS settings. tls_config: [ ] # Optional proxy URL. [ proxy_url: ] # Configures the queue used to write to remote storage. queue_config: # Number of samples to buffer per shard before we block reading of more # samples from the WAL. It is recommended to have enough capacity in each # shard to buffer several requests to keep throughput up while processing # occasional slow remote requests. [ capacity: | default = 500 ] # Maximum number of shards, i.e. amount of concurrency. [ max_shards: | default = 1000 ] # Minimum number of shards, i.e. amount of concurrency. [ min_shards: | default = 1 ] # Maximum number of samples per send. [ max_samples_per_send: | default = 100] # Maximum time a sample will wait in buffer. [ batch_send_deadline: | default = 5s ] # Initial retry delay. Gets doubled for every retry. [ min_backoff: | default = 30ms ] # Maximum retry delay. [ max_backoff: | default = 100ms ] ``` There is a list of [integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) with this feature. ### `` ```yaml # The URL of the endpoint to query from. url: # An optional list of equality matchers which have to be # present in a selector to query the remote read endpoint. required_matchers: [ : ... ] # Timeout for requests to the remote read endpoint. [ remote_timeout: | default = 1m ] # Whether reads should be made for queries for time ranges that # the local storage should have complete data for. [ read_recent: | default = false ] # Sets the `Authorization` header on every remote read request with the # configured username and password. # password and password_file are mutually exclusive. basic_auth: [ username: ] [ password: ] [ password_file: ] # Sets the `Authorization` header on every remote read request with # the configured bearer token. It is mutually exclusive with `bearer_token_file`. [ bearer_token: ] # Sets the `Authorization` header on every remote read request with the bearer token # read from the configured file. It is mutually exclusive with `bearer_token`. [ bearer_token_file: /path/to/bearer/token/file ] # Configures the remote read request's TLS settings. tls_config: [ ] # Optional proxy URL. [ proxy_url: ] ``` There is a list of [integrations](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) with this feature. prometheus-2.15.2+ds/docs/configuration/index.md000066400000000000000000000000521360540074000216220ustar00rootroot00000000000000--- title: Configuration sort_rank: 3 --- prometheus-2.15.2+ds/docs/configuration/recording_rules.md000066400000000000000000000064731360540074000237160ustar00rootroot00000000000000--- title: Recording rules sort_rank: 2 --- # Defining recording rules ## Configuring rules Prometheus supports two types of rules which may be configured and then evaluated at regular intervals: recording rules and [alerting rules](alerting_rules.md). To include rules in Prometheus, create a file containing the necessary rule statements and have Prometheus load the file via the `rule_files` field in the [Prometheus configuration](configuration.md). Rule files use YAML. The rule files can be reloaded at runtime by sending `SIGHUP` to the Prometheus process. The changes are only applied if all rule files are well-formatted. ## Syntax-checking rules To quickly check whether a rule file is syntactically correct without starting a Prometheus server, install and run Prometheus's `promtool` command-line utility tool: ```bash go get github.com/prometheus/prometheus/cmd/promtool promtool check rules /path/to/example.rules.yml ``` When the file is syntactically valid, the checker prints a textual representation of the parsed rules to standard output and then exits with a `0` return status. If there are any syntax errors or invalid input arguments, it prints an error message to standard error and exits with a `1` return status. ## Recording rules Recording rules allow you to precompute frequently needed or computationally expensive expressions and save their result as a new set of time series. Querying the precomputed result will then often be much faster than executing the original expression every time it is needed. This is especially useful for dashboards, which need to query the same expression repeatedly every time they refresh. Recording and alerting rules exist in a rule group. Rules within a group are run sequentially at a regular interval. The syntax of a rule file is: ```yaml groups: [ - ] ``` A simple example rules file would be: ```yaml groups: - name: example rules: - record: job:http_inprogress_requests:sum expr: sum(http_inprogress_requests) by (job) ``` ### `` ``` # The name of the group. Must be unique within a file. name: # How often rules in the group are evaluated. [ interval: | default = global.evaluation_interval ] rules: [ - ... ] ``` ### `` The syntax for recording rules is: ``` # The name of the time series to output to. Must be a valid metric name. record: # The PromQL expression to evaluate. Every evaluation cycle this is # evaluated at the current time, and the result recorded as a new set of # time series with the metric name as given by 'record'. expr: # Labels to add or overwrite before storing the result. labels: [ : ] ``` The syntax for alerting rules is: ``` # The name of the alert. Must be a valid metric name. alert: # The PromQL expression to evaluate. Every evaluation cycle this is # evaluated at the current time, and all resultant time series become # pending/firing alerts. expr: # Alerts are considered firing once they have been returned for this long. # Alerts which have not yet fired for long enough are considered pending. [ for: | default = 0s ] # Labels to add or overwrite for each alert. labels: [ : ] # Annotations to add to each alert. annotations: [ : ] ``` prometheus-2.15.2+ds/docs/configuration/template_examples.md000066400000000000000000000070341360540074000242330ustar00rootroot00000000000000--- title: Template examples sort_rank: 4 --- # Template examples Prometheus supports templating in the annotations and labels of alerts, as well as in served console pages. Templates have the ability to run queries against the local database, iterate over data, use conditionals, format data, etc. The Prometheus templating language is based on the [Go templating](https://golang.org/pkg/text/template/) system. ## Simple alert field templates ``` alert: InstanceDown expr: up == 0 for: 5m labels: severity: page annotations: summary: "Instance {{$labels.instance}} down" description: "{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 minutes." ``` Alert field templates will be executed during every rule iteration for each alert that fires, so keep any queries and templates lightweight. If you have a need for more complicated templates for alerts, it is recommended to link to a console instead. ## Simple iteration This displays a list of instances, and whether they are up: ```go {{ range query "up" }} {{ .Labels.instance }} {{ .Value }} {{ end }} ``` The special `.` variable contains the value of the current sample for each loop iteration. ## Display one value ```go {{ with query "some_metric{instance='someinstance'}" }} {{ . | first | value | humanize }} {{ end }} ``` Go and Go's templating language are both strongly typed, so one must check that samples were returned to avoid an execution error. For example this could happen if a scrape or rule evaluation has not run yet, or a host was down. The included `prom_query_drilldown` template handles this, allows for formatting of results, and linking to the [expression browser](https://prometheus.io/docs/visualization/browser/). ## Using console URL parameters ```go {{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }} {{ . | first | value | humanize1024 }}B {{ end }} ``` If accessed as `console.html?instance=hostname`, `.Params.instance` will evaluate to `hostname`. ## Advanced iteration ```html {{ range printf "node_network_receive_bytes{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device"}} {{ end }}
{{ .Labels.device }}
Received {{ with printf "rate(node_network_receive_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}
Transmitted {{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}
``` Here we iterate over all network devices and display the network traffic for each. As the `range` action does not specify a variable, `.Params.instance` is not available inside the loop as `.` is now the loop variable. ## Defining reusable templates Prometheus supports defining templates that can be reused. This is particularly powerful when combined with [console library](template_reference.md#console-templates) support, allowing sharing of templates across consoles. ```go {{/* Define the template */}} {{define "myTemplate"}} do something {{end}} {{/* Use the template */}} {{template "myTemplate"}} ``` Templates are limited to one argument. The `args` function can be used to wrap multiple arguments. ```go {{define "myMultiArgTemplate"}} First argument: {{.arg0}} Second argument: {{.arg1}} {{end}} {{template "myMultiArgTemplate" (args 1 2)}} ``` prometheus-2.15.2+ds/docs/configuration/template_reference.md000066400000000000000000000137171360540074000243600ustar00rootroot00000000000000--- title: Template reference sort_rank: 5 --- # Template reference Prometheus supports templating in the annotations and labels of alerts, as well as in served console pages. Templates have the ability to run queries against the local database, iterate over data, use conditionals, format data, etc. The Prometheus templating language is based on the [Go templating](https://golang.org/pkg/text/template/) system. ## Data Structures The primary data structure for dealing with time series data is the sample, defined as: ```go type sample struct { Labels map[string]string Value float64 } ``` The metric name of the sample is encoded in a special `__name__` label in the `Labels` map. `[]sample` means a list of samples. `interface{}` in Go is similar to a void pointer in C. ## Functions In addition to the [default functions](https://golang.org/pkg/text/template/#hdr-Functions) provided by Go templating, Prometheus provides functions for easier processing of query results in templates. If functions are used in a pipeline, the pipeline value is passed as the last argument. ### Queries | Name | Arguments | Returns | Notes | | ------------- | ------------- | -------- | -------- | | query | query string | []sample | Queries the database, does not support returning range vectors. | | first | []sample | sample | Equivalent to `index a 0` | | label | label, sample | string | Equivalent to `index sample.Labels label` | | value | sample | float64 | Equivalent to `sample.Value` | | sortByLabel | label, []samples | []sample | Sorts the samples by the given label. Is stable. | `first`, `label` and `value` are intended to make query results easily usable in pipelines. ### Numbers | Name | Arguments | Returns | Notes | | ------------- | --------------| --------| --------- | | humanize | number | string | Converts a number to a more readable format, using [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix). | humanize1024 | number | string | Like `humanize`, but uses 1024 as the base rather than 1000. | | humanizeDuration | number | string | Converts a duration in seconds to a more readable format. | | humanizePercentage | number | string | Converts a ratio value to a fraction of 100. | | humanizeTimestamp | number | string | Converts a Unix timestamp in seconds to a more readable format. | Humanizing functions are intended to produce reasonable output for consumption by humans, and are not guaranteed to return the same results between Prometheus versions. ### Strings | Name | Arguments | Returns | Notes | | ------------- | ------------- | ------- | ----------- | | title | string | string | [strings.Title](https://golang.org/pkg/strings/#Title), capitalises first character of each word.| | toUpper | string | string | [strings.ToUpper](https://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.| | toLower | string | string | [strings.ToLower](https://golang.org/pkg/strings/#ToLower), converts all characters to lower case.| | match | pattern, text | boolean | [regexp.MatchString](https://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. | | reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](https://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. | | graphLink | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | | tableLink | expr | string | Returns path to tabular ("Console") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | ### Others | Name | Arguments | Returns | Notes | | ------------- | ------------- | ------- | ----------- | | args | []interface{} | map[string]interface{} | This converts a list of objects to a map with keys arg0, arg1 etc. This is intended to allow multiple arguments to be passed to templates. | | tmpl | string, []interface{} | nothing | Like the built-in `template`, but allows non-literals as the template name. Note that the result is assumed to be safe, and will not be auto-escaped. Only available in consoles. | | safeHtml | string | string | Marks string as HTML not requiring auto-escaping. | ## Template type differences Each of the types of templates provide different information that can be used to parameterize templates, and have a few other differences. ### Alert field templates `.Value`, `.Labels`, and `ExternalLabels` contain the alert value, the alert labels, and the globally configured external labels, respectively. They are also exposed as the `$value`, `$labels`, and `$externalLabels` variables for convenience. ### Console templates Consoles are exposed on `/consoles/`, and sourced from the directory pointed to by the `-web.console.templates` flag. Console templates are rendered with [html/template](https://golang.org/pkg/html/template/), which provides auto-escaping. To bypass the auto-escaping use the `safe*` functions., URL parameters are available as a map in `.Params`. To access multiple URL parameters by the same name, `.RawParams` is a map of the list values for each parameter. The URL path is available in `.Path`, excluding the `/consoles/` prefix. The globally configured external labels are available as `.ExternalLabels`. There are also convenience variables for all four: `$rawParams`, `$params`, `$path`, and `$externalLabels`. Consoles also have access to all the templates defined with `{{define "templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to by the `-web.console.libraries` flag. As this is a shared namespace, take care to avoid clashes with other users. Template names beginning with `prom`, `_prom`, and `__` are reserved for use by Prometheus, as are the functions listed above. prometheus-2.15.2+ds/docs/configuration/unit_testing_rules.md000066400000000000000000000141201360540074000244420ustar00rootroot00000000000000--- title: Unit Testing for Rules sort_rank: 6 --- # Unit Testing for Rules You can use `promtool` to test your rules. ```shell # For a single test file. ./promtool test rules test.yml # If you have multiple test files, say test1.yml,test2.yml,test2.yml ./promtool test rules test1.yml test2.yml test3.yml ``` ## Test file format ```yaml # This is a list of rule files to consider for testing. Globs are supported. rule_files: [ - ] # optional, default = 1m evaluation_interval: # The order in which group names are listed below will be the order of evaluation of # rule groups (at a given evaluation time). The order is guaranteed only for the groups mentioned below. # All the groups need not be mentioned below. group_eval_order: [ - ] # All the tests are listed here. tests: [ - ] ``` ### `` ``` yaml # Series data interval: input_series: [ - ] # Unit tests for the above data. # Unit tests for alerting rules. We consider the alerting rules from the input file. alert_rule_test: [ - ] # Unit tests for PromQL expressions. promql_expr_test: [ - ] # External labels accessible to the alert template. external_labels: [ : ... ] ``` ### `` ```yaml # This follows the usual series notation '{