pax_global_header00006660000000000000000000000064141722263330014515gustar00rootroot0000000000000052 comment=d5b3478363faac79b21c3a6c78f2cdc2f39fd6c8 grokky-0.1.0/000077500000000000000000000000001417222633300130215ustar00rootroot00000000000000grokky-0.1.0/.gitignore000066400000000000000000000004371417222633300150150ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof cover.out cover.htmlgrokky-0.1.0/.travis.yml000066400000000000000000000003311417222633300151270ustar00rootroot00000000000000language: go go: - tip before_install: - go get github.com/axw/gocov/gocov - go get github.com/mattn/goveralls - go get golang.org/x/tools/cmd/cover script: - $HOME/gopath/bin/goveralls -service=travis-ci grokky-0.1.0/LICENSE000066400000000000000000000007451417222633300140340ustar00rootroot00000000000000 DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE Version 2, December 2004 Copyright (C) 2004 Sam Hocevar Everyone is permitted to copy and distribute verbatim or modified copies of this license document, and changing it is allowed as long as the name is changed. DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. You just DO WHAT THE FUCK YOU WANT TO. grokky-0.1.0/README.md000066400000000000000000000152661417222633300143120ustar00rootroot00000000000000# grokky [![GoDoc](https://godoc.org/github.com/logrusorgru/grokky?status.svg)](https://godoc.org/github.com/logrusorgru/grokky) [![WTFPL License](https://img.shields.io/badge/license-wtfpl-blue.svg)](http://www.wtfpl.net/about/) [![Build Status](https://travis-ci.org/logrusorgru/grokky.svg)](https://travis-ci.org/logrusorgru/grokky) [![Coverage Status](https://coveralls.io/repos/logrusorgru/grokky/badge.svg?branch=master)](https://coveralls.io/r/logrusorgru/grokky?branch=master) [![GoReportCard](https://goreportcard.com/badge/logrusorgru/grokky)](https://goreportcard.com/report/logrusorgru/grokky) [![Gitter](https://img.shields.io/badge/chat-on_gitter-46bc99.svg?logo=data:image%2Fsvg%2Bxml%3Bbase64%2CPHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGhlaWdodD0iMTQiIHdpZHRoPSIxNCI%2BPGcgZmlsbD0iI2ZmZiI%2BPHJlY3QgeD0iMCIgeT0iMyIgd2lkdGg9IjEiIGhlaWdodD0iNSIvPjxyZWN0IHg9IjIiIHk9IjQiIHdpZHRoPSIxIiBoZWlnaHQ9IjciLz48cmVjdCB4PSI0IiB5PSI0IiB3aWR0aD0iMSIgaGVpZ2h0PSI3Ii8%2BPHJlY3QgeD0iNiIgeT0iNCIgd2lkdGg9IjEiIGhlaWdodD0iNCIvPjwvZz48L3N2Zz4%3D&logoWidth=10)](https://gitter.im/logrusorgru/grokky?utm_source=share-link&utm_medium=link&utm_campaign=share-link) Package grokky is a pure Golang Grok-like patterns library, which can help you to parse log files and other. This is based on [RE2](https://en.wikipedia.org/wiki/RE2_%28software%29) regexp that [much more faster](https://swtch.com/~rsc/regexp/regexp1.html) than [Oniguruma](https://en.wikipedia.org/wiki/Oniguruma) in some cases. Check out the "much more faster" article to understand the difference. The library was disigned for creating many patterns and using it many times. The behavior and capabilities are slightly different from the original library. The goals of the library are: 1. simplicity, 2. fast, 3. ease of use. # Also See also another golang implementation [vjeantet/grok](https://github.com/vjeantet/grok) that is closer to the [original](https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html) library. The difference: 1. The grokky allows named captures only. Any name of a pattern is just name of a pattern and nothing more. You can treat is as an alias for regexp. It's impossible to use a name of a pattern as a capture group. In some cases the grooky is similar to the grok that created as `g, err := grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true})`. 2. The grokky prefered top named group. If you have two patterns. And the second pattern has same named group and nested into first. Then the named group of the first pattern will be used. The grok uses last (closer to tail) group in any cases. But the grok also has `ParseToMultiMap` method. To see the difference explanation get the package (using `go get -t`) and run the following command `go test -v -run the_difference github.com/logrusorgru/grokky`. Or check out [source code of the test](https://github.com/logrusorgru/grokky/blob/master/bench_test.go#L134). 3. The grokky was designed as a factory of patterns. E.g. compile once and use many times. # Get it ``` go get -u -t github.com/logrusorgru/grokky ``` Run test case ``` go test github.com/logrusorgru/grokky ``` Run benchmark comparsion with vjeantet/grok ``` go test -bench=.* github.com/logrusorgru/grokky ``` # Example ```go package main import ( "github.com/logrusorgru/grokky" "fmt" "log" "time" ) func createHost() grokky.Host { h := grokky.New() // add patterns to the Host h.Must("YEAR", `(?:\d\d){1,2}`) h.Must("MONTHNUM2", `0[1-9]|1[0-2]`) h.Must("MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`) h.Must("HOUR", `2[0123]|[01]?[0-9]`) h.Must("MINUTE", `[0-5][0-9]`) h.Must("SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`) h.Must("TIMEZONE", `Z%{HOUR}:%{MINUTE}`) h.Must("DATE", "%{YEAR:year}-%{MONTHNUM2:month}-%{MONTHDAY:day}") h.Must("TIME", "%{HOUR:hour}:%{MINUTE:min}:%{SECOND:sec}") return h } func main() { h := createHost() // compile the pattern for RFC3339 time p, err := h.Compile("%{DATE:date}T%{TIME:time}%{TIMEZONE:tz}") if err != nil { log.Fatal(err) } for k, v := range p.Parse(time.Now().Format(time.RFC3339)) { fmt.Printf("%s: %v\n", k, v) } // // Yes, it's better to use time.Parse for time values // but this is just example. // } ``` # Performance note Don't complicate regular expressions. Use simplest regular expressions possible. Here is example about Nginx access log, combined format: ```go h := New() h.Must("NSS", `[^\s]*`) // not a space * h.Must("NS", `[^\s]+`) // not a space + h.Must("NLB", `[^\]]+`) // not a left bracket + h.Must("NQS", `[^"]*`) // not a double quote * h.Must("NQ", `[^"]+`) // not a double quote + h.Must("nginx", `%{NS:remote_addr}\s\-\s`+ `%{NSS:remote_user}\s*\-\s\[`+ `%{NLB:time_local}\]\s\"`+ `%{NQ:request}\"\s`+ `%{NS:status}\s`+ `%{NS:body_bytes_sent}\s\"`+ `%{NQ:http_referer}\"\s\"`+ `%{NQ:user_agent}\"`) nginx, err := h.Get("nginx") if err != nil { panic(err) } for logLine := range catLogFileLineByLineChannel { values := nginx.Parse(logLine) // stuff } ``` or there is a version (thanks for __@nanjj__) ```go h := New() h.Must("NSS", `[^\s]*`) // not a space * h.Must("NS", `[^\s]+`) // not a space + h.Must("NLB", `[^\]]+`) // not a left bracket + h.Must("NQS", `[^"]*`) // not a double quote * h.Must("NQ", `[^"]+`) // not a double quote + h.Must("A", `.*`) // all (get tail) h.Must("nginx", `%{NS:clientip}\s%{NSS:ident}\s%{NSS:auth}`+ `\s\[`+ `%{NLB:timestamp}\]\s\"`+ `%{NS:verb}\s`+ `%{NSS:request}\s`+ `HTTP/%{NS:httpversion}\"\s`+ `%{NS:response}\s`+ `%{NS:bytes}\s\"`+ `%{NQ:referrer}\"\s\"`+ `%{NQ:agent}\"`+ `%{A:blob}`) // [...] ``` ## More performance Since the [`grokky.Pattern`](https://godoc.org/github.com/logrusorgru/grokky#Pattern) inherits [`regexp.Regexp`](https://godoc.org/regexp#Regexp), it's possible to use methods of the `regexp.Regexp`. E.g. you can to use [`FindStringSubmatch`](https://godoc.org/regexp#Regexp.FindStringSubmatch) for example instead of `(grokky.Pattern).Parse`. Or any other method of the `regexp.Regexp`. Check out [Benchmark_parse_vs_findStringSubmatch](https://github.com/logrusorgru/grokky/blob/master/bench_test.go#L409) for example. For my machine result of this becnhmark is (the map is `Parse`, and the slice is `FindStringSubmatch`) ``` map-4 200000 9980 ns/op 1370 B/op 5 allocs/op slice-4 200000 7508 ns/op 416 B/op 2 allocs/op ``` # Licensing Copyright © 2016-2018 Konstantin Ivanov This work is free. It comes without any warranty, to the extent permitted by applicable law. You can redistribute it and/or modify it under the terms of the Do What The Fuck You Want To Public License, Version 2, as published by Sam Hocevar. See the LICENSE file for more details. grokky-0.1.0/base.go000066400000000000000000000212551417222633300142670ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky func must(err error) { if err != nil { panic(err) } } // Must is like Add but panics if the expression can't be parsed or // the name is empty. func (h Host) Must(name, expr string) { must(h.Add(name, expr)) } // NewBase creates new Host that filled up with base patterns. // To see all base patterns open 'base.go' file. func NewBase() Host { h := make(Host) // h.Must("USERNAME", `[a-zA-Z0-9._-]+`) h.Must("USER", `%{USERNAME}`) h.Must("EMAILLOCALPART", `[a-zA-Z][a-zA-Z0-9_.+-=:]+`) h.Must("HOSTNAME", `\b[0-9A-Za-z][0-9A-Za-z-]{0,62}(?:\.[0-9A-Za-z][0-9A-Za-z-]{0,62})*(\.?|\b)`) h.Must("EMAILADDRESS", `%{EMAILLOCALPART}@%{HOSTNAME}`) h.Must("HTTPDUSER", `%{EMAILADDRESS}|%{USER}`) h.Must("INT", `[+-]?(?:[0-9]+)`) h.Must("BASE10NUM", `[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))`) h.Must("NUMBER", `%{BASE10NUM}`) h.Must("BASE16NUM", `[+-]?(?:0x)?(?:[0-9A-Fa-f]+)`) h.Must("BASE16FLOAT", `\b[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+))\b`) // h.Must("POSINT", `\b[1-9][0-9]*\b`) h.Must("NONNEGINT", `\b[0-9]+\b`) h.Must("WORD", `\b\w+\b`) h.Must("NOTSPACE", `\S+`) h.Must("SPACE", `\s*`) h.Must("DATA", `.*?`) h.Must("GREEDYDATA", `.*`) h.Must("QUOTEDSTRING", `("(\\.|[^\\"]+)+")|""|('(\\.|[^\\']+)+')|''|`+ "(`(\\\\.|[^\\\\`]+)+`)|``") h.Must("UUID", `[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}`) // Networking h.Must("CISCOMAC", `(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}`) h.Must("WINDOWSMAC", `(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}`) h.Must("COMMONMAC", `(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}`) h.Must("MAC", `%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}`) h.Must("IPV6", `((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?`) h.Must("IPV4", `(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)`) h.Must("IP", `%{IPV6}|%{IPV4}`) h.Must("IPORHOST", `%{IP}|%{HOSTNAME}`) h.Must("HOSTPORT", `%{IPORHOST}:%{POSINT}`) // paths h.Must("UNIXPATH", `(/([\w_%!$@:.,~-]+|\\.)*)+`) h.Must("TTY", `/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)`) h.Must("WINPATH", `(?:[A-Za-z]+:|\\)(?:\\[^\\?*]*)+`) h.Must("PATH", `%{UNIXPATH}|%{WINPATH}`) h.Must("URIPROTO", `[A-Za-z]+(\+[A-Za-z+]+)?`) h.Must("URIHOST", `%{IPORHOST}(?::%{POSINT:port})?`) // uripath comes loosely from RFC1738, but mostly from what Firefox // doesn't turn into %XX h.Must("URIPATH", `(?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+`) h.Must("URIPARAM", `\?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*`) h.Must("URIPATHPARAM", `%{URIPATH}(?:%{URIPARAM})?`) h.Must("URI", `%{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?`) // Months: January, Feb, 3, 03, 12, December h.Must("MONTH", `\bJan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?\b`) h.Must("MONTHNUM", `0?[1-9]|1[0-2]`) h.Must("MONTHNUM2", `0[1-9]|1[0-2]`) h.Must("MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`) // Days: Monday, Tue, Thu, etc... h.Must("DAY", `Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?`) // Years? h.Must("YEAR", `(?:\d\d){1,2}`) h.Must("HOUR", `2[0123]|[01]?[0-9]`) h.Must("MINUTE", `[0-5][0-9]`) // '60' is a leap second in most time standards and thus is valid. h.Must("SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`) h.Must("TIME", `%{HOUR}:%{MINUTE}:%{SECOND}`) // datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) h.Must("DATE_US", `%{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}`) h.Must("DATE_EU", `%{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}`) // I really don't know how it's called h.Must("DATE_X", `%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}`) h.Must("ISO8601_TIMEZONE", `Z|[+-]%{HOUR}(?::?%{MINUTE})`) h.Must("ISO8601_SECOND", `%{SECOND}|60`) h.Must("TIMESTAMP_ISO8601", `%{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?`) h.Must("DATE", `%{DATE_US}|%{DATE_EU}|%{DATE_X}`) h.Must("DATESTAMP", `%{DATE}[- ]%{TIME}`) h.Must("TZ", `[A-Z]{3}`) h.Must("NUMTZ", `[+-]\d{4}`) h.Must("DATESTAMP_RFC822", `%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}`) h.Must("DATESTAMP_RFC2822", `%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}`) h.Must("DATESTAMP_OTHER", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}`) h.Must("DATESTAMP_EVENTLOG", `%{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}`) h.Must("HTTPDERROR_DATE", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}`) // golang time patterns h.Must("ANSIC", `%{DAY} %{MONTH} [_123]\d %{TIME} %{YEAR}"`) h.Must("UNIXDATE", `%{DAY} %{MONTH} [_123]\d %{TIME} %{TZ} %{YEAR}`) h.Must("RUBYDATE", `%{DAY} %{MONTH} [0-3]\d %{TIME} %{NUMTZ} %{YEAR}`) h.Must("RFC822Z", `[0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) h.Must("RFC850", `%{DAY}, [0-3]\d-%{MONTH}-%{YEAR} %{TIME} %{TZ}`) h.Must("RFC1123", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{TZ}`) h.Must("RFC1123Z", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) h.Must("RFC3339", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}%{ISO8601_TIMEZONE}`) h.Must("RFC3339NANO", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}\.\d{9}%{ISO8601_TIMEZONE}`) h.Must("KITCHEN", `\d{1,2}:\d{2}(AM|PM|am|pm)`) // Syslog Dates: Month Day HH:MM:SS h.Must("SYSLOGTIMESTAMP", `%{MONTH} +%{MONTHDAY} %{TIME}`) h.Must("PROG", `[\x21-\x5a\x5c\x5e-\x7e]+`) h.Must("SYSLOGPROG", `%{PROG:program}(?:\[%{POSINT:pid}\])?`) h.Must("SYSLOGHOST", `%{IPORHOST}`) h.Must("SYSLOGFACILITY", `<%{NONNEGINT:facility}.%{NONNEGINT:priority}>`) h.Must("HTTPDATE", `%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}`) // Shortcuts h.Must("QS", `%{QUOTEDSTRING}`) // Log Levels h.Must("LOGLEVEL", `[Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?`) // Log formats h.Must("SYSLOGBASE", `%{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:`) h.Must("COMMONAPACHELOG", `%{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)`) h.Must("COMBINEDAPACHELOG", `%{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}`) h.Must("HTTPD20_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}`) h.Must("HTTPD24_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}(:tid %{NUMBER:tid})?\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}`) h.Must("HTTPD_ERRORLOG", `%{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}`) return h } grokky-0.1.0/base_test.go000066400000000000000000000262431417222633300153300ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky import ( "testing" ) func TestHost_Must(t *testing.T) { defer func() { if r := recover(); r == nil { t.Error("missing panic") } }() h := New() h.Must("", "") } func TestNewBase(t *testing.T) { defer func() { if r := recover(); r != nil { t.Fatalf("NewBase panics: %v", r) } }() h := NewBase() _ = h } // // TODO/REDO ah-ha-ha // /* func testPattern(t *testing.T, name string, input string, expect map[string]string) { h := NewBase() p, err := h.Get(name) if err != nil { t.Error(err) return } if !mssTest(expect, p.Parse(input)) { t.Errorf("pattern [%s] missmatches\n%v\n%v\n%v", name, p.Parse(input), expect, p) } } func Test_basePatterns_base(t *testing.T) { testPattern(t, "USERNAME", "jho.blow-motherfucker666", map[string]string{ "USERNAME": "jho.blow-motherfucker666", }) testPattern(t, "USER", "jho.blow-motherfucker666", map[string]string{ "USERNAME": "jho.blow-motherfucker666", "USER": "jho.blow-motherfucker666", }) // TODO // --testPattern(t, "EMAILLOCALPART", `[a-zA-Z][a-zA-Z0-9_.+-=:]+`) // --testPattern(t, "HOSTNAME", `\b[0-9A-Za-z][0-9A-Za-z-]{0,62}(?:\.[0-9A-Za-z][0-9A-Za-z-]{0,62})*(\.?|\b)`) // --testPattern(t, "EMAILADDRESS", `%{EMAILLOCALPART}@%{HOSTNAME}`) // --testPattern(t, "HTTPDUSER", `%{EMAILADDRESS}|%{USER}`) // --testPattern(t, "INT", `[+-]?(?:[0-9]+)`) // --testPattern(t, "BASE10NUM", `[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))`) // --testPattern(t, "NUMBER", `%{BASE10NUM}`) // --testPattern(t, "BASE16NUM", `[+-]?(?:0x)?(?:[0-9A-Fa-f]+)`) // --testPattern(t, "BASE16FLOAT", `\b[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+))\b`) } func Test_basePatterns_wordsNumbers(t *testing.T) { // TODO testPattern(t, "POSINT", "19", map[string]string{ "POSINT": "19", }) testPattern(t, "NONNEGINT", "0", map[string]string{ "NONNEGINT": "0", }) testPattern(t, "WORD", "word", map[string]string{ "WORD": "word", }) // --testPattern(t, "NOTSPACE", `\S+`) // --testPattern(t, "SPACE", `\s*`) // --testPattern(t, "DATA", `.*?`) // --testPattern(t, "GREEDYDATA", `.*`) // --testPattern(t, "QUOTEDSTRING", `("(\\.|[^\\"]+)+")|""|('(\\.|[^\\']+)+')|''|`+"(`(\\\\.|[^\\\\`]+)+`)|``") // --testPattern(t, "UUID", `[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}`) } func Test_basePatterns_networking(t *testing.T) { // TODO // Networking // --testPattern(t, "CISCOMAC", `(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}`) // --testPattern(t, "WINDOWSMAC", `(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}`) // --testPattern(t, "COMMONMAC", `(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}`) // --testPattern(t, "MAC", `%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}`) // --testPattern(t, "IPV6", `((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?`) // --testPattern(t, "IPV4", `(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))`) // --testPattern(t, "IP", `%{IPV6}|%{IPV4}`) // --testPattern(t, "IPORHOST", `%{IP}|%{HOSTNAME}`) // --testPattern(t, "HOSTPORT", `%{IPORHOST}:%{POSINT}`) } func Test_basePatterns_paths(t *testing.T) { // TODO // paths // --testPattern(t, "UNIXPATH", `(/([\w_%!$@:.,~-]+|\\.)*)+`) // --testPattern(t, "TTY", `/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)`) // --testPattern(t, "WINPATH", `(?:[A-Za-z]+:|\\)(?:\\[^\\?*]*)+`) // --testPattern(t, "PATH", `%{UNIXPATH}|%{WINPATH}`) // --testPattern(t, "URIPROTO", `[A-Za-z]+(\+[A-Za-z+]+)?`) // --testPattern(t, "URIHOST", `%{IPORHOST}(?::%{POSINT:port})?`) } func Test_basePatterns_uri(t *testing.T) { // TODO // uripath comes loosely from RFC1738, but mostly from what Firefox // doesn't turn into %XX // --testPattern(t, "URIPATH", `(?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+`) // --testPattern(t, "URIPARAM", `\?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*`) // --testPattern(t, "URIPATHPARAM", `%{URIPATH}(?:%{URIPARAM})?`) // --testPattern(t, "URI", `%{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?`) } func Test_basePatterns_date(t *testing.T) { // TODO // Months: January, Feb, 3, 03, 12, December // --testPattern(t, "MONTH", `\bJan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?\b`) // --testPattern(t, "MONTHNUM", `0?[1-9]|1[0-2]`) testPattern(t, "MONTHNUM2", "11", map[string]string{ "MONTHNUM2": "11", }) // --testPattern(t, "MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`) // Days: Monday, Tue, Thu, etc... // --testPattern(t, "DAY", `Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?`) } func Test_basePatterns_time(t *testing.T) { // Years? // --testPattern(t, "YEAR", `(?:\d\d){1,2}`) // --testPattern(t, "HOUR", `2[0123]|[01]?[0-9]`) // --testPattern(t, "MINUTE", `[0-5][0-9]`) // '60' is a leap second in most time standards and thus is valid. // --testPattern(t, "SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`) // --testPattern(t, "TIME", `%{HOUR}:%{MINUTE}:%{SECOND}`) } func Test_basePatterns_timestamps(t *testing.T) { // datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) // --testPattern(t, "DATE_US", `%{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}`) // --testPattern(t, "DATE_EU", `%{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}`) // I really don't know how it's called // --testPattern(t, "DATE_X", `%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}`) // --testPattern(t, "ISO8601_TIMEZONE", `Z|[+-]%{HOUR}(?::?%{MINUTE})`) // --testPattern(t, "ISO8601_SECOND", `%{SECOND}|60`) // --testPattern(t, "TIMESTAMP_ISO8601", `%{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?`) // --testPattern(t, "DATE", `%{DATE_US}|%{DATE_EU}|%{DATE_X}`) // --testPattern(t, "DATESTAMP", `%{DATE}[- ]%{TIME}`) // --testPattern(t, "TZ", `[A-Z]{3}`) // --testPattern(t, "NUMTZ", `[+-]\d{4}`) // --testPattern(t, "DATESTAMP_RFC822", `%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}`) // --testPattern(t, "DATESTAMP_RFC2822", `%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}`) // --testPattern(t, "DATESTAMP_OTHER", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}`) // --testPattern(t, "DATESTAMP_EVENTLOG", `%{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}`) // --testPattern(t, "HTTPDERROR_DATE", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}`) } func Test_basePatterns_golangTime(t *testing.T) { // golang time patterns // --testPattern(t, "ANSIC", `%{DAY} %{MONTH} [_123]\d %{TIME} %{YEAR}"`) // --testPattern(t, "UNIXDATE", `%{DAY} %{MONTH} [_123]\d %{TIME} %{TZ} %{YEAR}`) // --testPattern(t, "RUBYDATE", `%{DAY} %{MONTH} [0-3]\d %{TIME} %{NUMTZ} %{YEAR}`) // --testPattern(t, "RFC822Z", `[0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) // --testPattern(t, "RFC850", `%{DAY}, [0-3]\d-%{MONTH}-%{YEAR} %{TIME} %{TZ}`) // --testPattern(t, "RFC1123", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{TZ}`) // --testPattern(t, "RFC1123Z", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) // --testPattern(t, "RFC3339", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}%{ISO8601_TIMEZONE}`) // --testPattern(t, "RFC3339NANO", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}\.\d{9}%{ISO8601_TIMEZONE}`) // --testPattern(t, "KITCHEN", `\d{1,2}:\d{2}(AM|PM|am|pm)`) } func Test_basePatterns_syslogDates(t *testing.T) { // Syslog Dates: Month Day HH:MM:SS // --testPattern(t, "SYSLOGTIMESTAMP", `%{MONTH} +%{MONTHDAY} %{TIME}`) // --testPattern(t, "PROG", `[\x21-\x5a\x5c\x5e-\x7e]+`) // --testPattern(t, "SYSLOGPROG", `%{PROG:program}(?:\[%{POSINT:pid}\])?`) // --testPattern(t, "SYSLOGHOST", `%{IPORHOST}`) // --testPattern(t, "SYSLOGFACILITY", `<%{NONNEGINT:facility}.%{NONNEGINT:priority}>`) // --testPattern(t, "HTTPDATE", `%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}`) } func Test_basePatterns_shortcuts(t *testing.T) { // Shortcuts // --testPattern(t, "QS", `%{QUOTEDSTRING}`) } func Test_basePatterns_logLevels(t *testing.T) { // Log Levels // --testPattern(t, "LOGLEVEL", `[Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?`) } func Test_basePatterns_logFormats(t *testing.T) { // Log formats // --testPattern(t, "SYSLOGBASE", `%{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:`) // --testPattern(t, "COMMONAPACHELOG", `%{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)`) // --testPattern(t, "COMBINEDAPACHELOG", `%{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}`) // --testPattern(t, "HTTPD20_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}`) // --testPattern(t, "HTTPD24_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}:tid %{NUMBER:tid}\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}`) // --testPattern(t, "HTTPD_ERRORLOG", `%{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}`) } */ grokky-0.1.0/bench_test.go000066400000000000000000000443371417222633300155010ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky import ( "testing" "github.com/vjeantet/grok" ) // Intel Core i5-6200U // DDR4 8G 2133 MHz // Linux Ubuntu 17.04 4.12.4-041204-generic // // go test -bench . -benchtime=1m // // Benchmark_logrusorgru_grokky_rfc3339-4 30000000 3201 ns/op 1297 B/op 5 allocs/op // Benchmark_vjeantet_grok_rfc3339-4 30000000 2967 ns/op 1329 B/op 5 allocs/op // Benchmark_grokkyVsGrokApacheLog/grokky-4 200000 447480 ns/op 5098 B/op 6 allocs/op // Benchmark_grokkyVsGrokApacheLog/grok-4 200000 473763 ns/op 5609 B/op 6 allocs/op // PASS // ok github.com/logrusorgru/grokky 385.076s // RFC3339 = "2006-01-02T15:04:05Z07:00" const rfc3339Testee = "2006-01-02T15:04:05Z07:00" var ( globalMap map[string]string globalString string ) // find: // tz, date, year, month, day, time, hour, min, sec func Benchmark_logrusorgru_grokky_rfc3339(b *testing.B) { b.StopTimer() h := New() // from base patterns h.Must("YEAR", `(?:\d\d){1,2}`) h.Must("MONTHNUM2", `0[1-9]|1[0-2]`) h.Must("MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`) h.Must("HOUR", `2[0123]|[01]?[0-9]`) h.Must("MINUTE", `[0-5][0-9]`) h.Must("SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`) h.Must("TIMEZONE", `Z%{HOUR}:%{MINUTE}`) // not from base h.Must("DATE", "%{YEAR:year}-%{MONTHNUM2:month}-%{MONTHDAY:day}") h.Must("TIME", "%{HOUR:hour}:%{MINUTE:min}:%{SECOND:sec}") // compile the pattern p, err := h.Compile("%{DATE:date}T%{TIME:time}%{TIMEZONE:tz}") if err != nil { b.Fatal(err) } b.StartTimer() for i := 0; i < b.N; i++ { mss := p.Parse(rfc3339Testee) globalMap = mss } b.ReportAllocs() } func berr(b *testing.B, err error) { if err != nil { b.Fatal(err) } } // find: // tz, date, year, month, day, time, hour, min, sec func Benchmark_vjeantet_grok_rfc3339(b *testing.B) { b.StopTimer() h, err := grok.NewWithConfig(&grok.Config{ SkipDefaultPatterns: true, NamedCapturesOnly: true, }) if err != nil { b.Skip("error creating vjeantet/grok:", err) } for _, np := range []struct{ name, pattern string }{ {"YEAR", `(?:\d\d){1,2}`}, {"MONTHNUM2", `0[1-9]|1[0-2]`}, {"MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`}, {"HOUR", `2[0123]|[01]?[0-9]`}, {"MINUTE", `[0-5][0-9]`}, {"SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`}, {"TIMEZONE", `Z%{HOUR}:%{MINUTE}`}, // {"DATE", "%{YEAR:year}-%{MONTHNUM2:month}-%{MONTHDAY:day}"}, {"TIME", "%{HOUR:hour}:%{MINUTE:min}:%{SECOND:sec}"}, // the pattern {"MAIN", "%{DATE:date}T%{TIME:time}%{TIMEZONE:tz}"}, } { if err = h.AddPattern(np.name, np.pattern); err != nil { b.Skip("error adding pattern:", err) } } b.StartTimer() for i := 0; i < b.N; i++ { if globalMap, err = h.Parse("%{MAIN}", rfc3339Testee); err != nil { b.Skip("parsing error:", err) } } b.ReportAllocs() } // go test -v -run the_difference func Test_the_difference(t *testing.T) { t.Log(`show the difference between logrusorgru/grokky and vjeantet/grok pattern '%{NUM:one} %{NUMBERS}' where NUMBERS is '%{NUM:one} %{NUM:two}' and NUM is '\d' (single number) Input is: '1 2 3'`) const input = "1 2 3" t.Run("logrusorgru/grokky", func(t *testing.T) { h := New() h.Add("NUM", `\d`) h.Add("NUMBERS", "%{NUM:one} %{NUM:two}") h.Add("RES", "%{NUM:one} %{NUMBERS}") p, err := h.Get("RES") if err != nil { t.Fatal(err) } t.Log("result is:", p.Parse(input)) }) t.Run("vjeantet/grok", func(t *testing.T) { h, err := grok.NewWithConfig(&grok.Config{ NamedCapturesOnly: true, SkipDefaultPatterns: true, }) if err != nil { t.Skip("error:", err) } h.AddPattern("NUM", `\d`) h.AddPattern("NUMBERS", "%{NUM:one} %{NUM:two}") h.AddPattern("RES", "%{NUM:one} %{NUMBERS}") mss, err := h.Parse("%{RES}", input) if err != nil { t.Fatal(err) } t.Log("result is:", mss) }) // Input is "1 2 3", output of this test: // // --- PASS: Test_the_difference/logrusorgru/grokky (0.00s) // bench_test.go:147: result is: map[one:1 two:3] // --- PASS: Test_the_difference/vjeantet/grok (0.00s) // bench_test.go:165: result is: map[one:2 two:3] // // E.g. for pattern %{one:\d %{one: \d, two: \d}}, // grokky matches 1 and 3 (closer to top, sicne the // second "one" is nested), and the grok returns // 2 and 3 (closer to tail), and nesting level doesn't // matter. } type testParseFunc func(string, string) (map[string]string, error) var testGrokkyParse = func() testParseFunc { type Pair struct { Name string Pattern string } patterns := []Pair{ {"LOGLEVEL", `([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)`}, {"USERNAME", `[a-zA-Z0-9._-]+`}, {"HOSTNAME", `\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)`}, {"USER", `%{USERNAME}`}, {"EMAILLOCALPART", `[a-zA-Z][a-zA-Z0-9_.+-=:]+`}, {"EMAILADDRESS", `%{EMAILLOCALPART}@%{HOSTNAME}`}, {"HTTPDUSER", `%{EMAILADDRESS}|%{USER}`}, {"INT", `(?:[+-]?(?:[0-9]+))`}, {"BASE10NUM", `([+-]?(?:[0-9]+(?:\.[0-9]+)?)|\.[0-9]+)`}, {"NUMBER", `(?:%{BASE10NUM})`}, {"BASE16NUM", `(0[xX]?[0-9a-fA-F]+)`}, {"POSINT", `\b(?:[1-9][0-9]*)\b`}, {"NONNEGINT", `\b(?:[0-9]+)\b`}, {"WORD", `\b\w+\b`}, {"NOTSPACE", `\S+`}, {"SPACE", `\s*`}, {"DATA", `.*?`}, {"GREEDYDATA", `.*`}, {"QUOTEDSTRING", `"([^"\\]*(\\.[^"\\]*)*)"|\'([^\'\\]*(\\.[^\'\\]*)*)\'`}, {"UUID", `[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}`}, {"CISCOMAC", `(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})`}, {"WINDOWSMAC", `(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})`}, {"COMMONMAC", `(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})`}, {"MAC", `(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})`}, {"IPV6", `((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?`}, {"IPV4", `(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)`}, {"IP", `(?:%{IPV6}|%{IPV4})`}, {"HOST", `%{HOSTNAME}`}, {"IPORHOST", `(?:%{IP}|%{HOSTNAME})`}, {"HOSTPORT", `%{IPORHOST}:%{POSINT}`}, {"UNIXPATH", `(/[\w_%!$@:.,-]?/?)(\S+)?`}, {"WINPATH", `([A-Za-z]:|\\)(?:\\[^\\?*]*)+`}, {"PATH", `(?:%{UNIXPATH}|%{WINPATH})`}, {"TTY", `(?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))`}, {"URIPROTO", `[A-Za-z]+(\+[A-Za-z+]+)?`}, {"URIHOST", `%{IPORHOST}(?::%{POSINT:port})?`}, {"URIPATH", `(?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+`}, {"URIPARAM", `\?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*`}, {"URIPATHPARAM", `%{URIPATH}(?:%{URIPARAM})?`}, {"URI", `%{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?`}, {"MONTH", `\b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\b`}, {"MONTHNUM", `(?:0?[1-9]|1[0-2])`}, {"MONTHNUM2", `(?:0[1-9]|1[0-2])`}, {"MONTHDAY", `(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])`}, {"DAY", `(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)`}, {"YEAR", `(\d\d){1,2}`}, {"HOUR", `(?:2[0123]|[01]?[0-9])`}, {"MINUTE", `(?:[0-5][0-9])`}, {"SECOND", `(?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)`}, {"TIME", `([^0-9]?)%{HOUR}:%{MINUTE}(?::%{SECOND})([^0-9]?)`}, {"DATE_US", `%{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}`}, {"DATE_EU", `%{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}`}, {"ISO8601_TIMEZONE", `(?:Z|[+-]%{HOUR}(?::?%{MINUTE}))`}, {"ISO8601_SECOND", `(?:%{SECOND}|60)`}, {"TIMESTAMP_ISO8601", `%{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?`}, {"DATE", `%{DATE_US}|%{DATE_EU}`}, {"DATESTAMP", `%{DATE}[- ]%{TIME}`}, {"TZ", `(?:[PMCE][SD]T|UTC)`}, {"DATESTAMP_RFC822", `%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}`}, {"DATESTAMP_RFC2822", `%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}`}, {"DATESTAMP_OTHER", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}`}, {"DATESTAMP_EVENTLOG", `%{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}`}, {"HTTPDERROR_DATE", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}`}, {"SYSLOGTIMESTAMP", `%{MONTH} +%{MONTHDAY} %{TIME}`}, {"PROG", `[\x21-\x5a\x5c\x5e-\x7e]+`}, {"SYSLOGPROG", `%{PROG:program}(?:\[%{POSINT:pid}\])?`}, {"SYSLOGHOST", `%{IPORHOST}`}, {"SYSLOGFACILITY", `<%{NONNEGINT:facility}.%{NONNEGINT:priority}>`}, {"HTTPDATE", `%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}`}, {"QS", `%{QUOTEDSTRING}`}, {"SYSLOGBASE", `%{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:`}, {"COMMONAPACHELOG", `%{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)`}, {"COMBINEDAPACHELOG", `%{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}`}, {"HTTPD20_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}`}, {"HTTPD24_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}:tid %{NUMBER:tid}\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}`}, {"HTTPD_ERRORLOG", `%{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}`}, } h := New() for _, p := range patterns { h.Add(p.Name, p.Pattern) } p, _ := h.Get("COMBINEDAPACHELOG") return func(name string, input string) (m map[string]string, err error) { m = p.Parse(input) return } }() var testGrokParse = func() testParseFunc { g, _ := grok.NewWithConfig(&grok.Config{ NamedCapturesOnly: true, }) return g.Parse }() func Benchmark_grokkyVsGrokApacheLog(b *testing.B) { for _, np := range []struct { name string parseFunc testParseFunc }{ {"grokky", testGrokkyParse}, {"grok", testGrokParse}, } { b.Run(np.name, func(b *testing.B) { for i := 0; i < b.N; i++ { m, err := np.parseFunc("%{COMBINEDAPACHELOG}", `127.0.0.1 - - [02/Aug/2017:22:58:13 +0800] "GET / HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:52.0) Gecko/20100101 Firefox/52.0" "-"`) if len(m) == 0 { b.Fatal(err) } m, err = np.parseFunc("%{COMBINEDAPACHELOG}", `....`) if len(m) != 0 { b.Fatal(err, m) } } b.ReportAllocs() }) } } // one cycle is 3 log lines // ------------------------ // // $ go test -bench simpleNginxAccessLog -benchtime=1m // Benchmark_simpleNginxAccessLog-4 3000000 26505 ns/op 1872 B/op 12 allocs/op // PASS // ok github.com/logrusorgru/grokky 107.686s // so, a second is 1*1000*1000*1000, thus 26505ns/op is 37728op/s, // where op is 3 lines (~ 113184op/s) // after #3 // Benchmark_simpleNginxAccessLog-4 10000000 9895 ns/op 1370 B/op 5 allocs/op // where op is _one_ line func Benchmark_simpleNginxAccessLog(b *testing.B) { // https://play.golang.org/p/XKtY84Uicf // using simple regular expression // (?P[^\s]+)\s\-\s(?P[^\s]*)\s*\-\s\[(?P.+)\]\s\"(?P[^"]*)\"\s(?P[^\s]+)\s(?P[^\s]+)\s\"(?P[^"]+)\"\s\"(?P[^"]+)\" b.StopTimer() h := New() h.Must("NSS", `[^\s]*`) // not a space * h.Must("NS", `[^\s]+`) // not a space + h.Must("NLB", `[^\]]+`) // not a left bracket + h.Must("NQS", `[^"]*`) // not a quote * h.Must("NQ", `[^"]+`) // not a double quotes + h.Must("A", `.*`) // all h.Must("nginx", `%{NS:clientip}\s%{NSS:ident}\s%{NSS:auth}`+ `\s\[`+ `%{NLB:timestamp}\]\s\"`+ `%{NS:verb}\s`+ `%{NSS:request}\s`+ `HTTP/%{NS:httpversion}\"\s`+ `%{NS:response}\s`+ `%{NS:bytes}\s\"`+ `%{NQ:referrer}\"\s\"`+ `%{NQ:agent}\"`+ `%{A:blob}`) nginx, err := h.Get("nginx") if err != nil { b.Fatal(err) } lines := []string{ `66.249.65.159 - - [06/Nov/2014:19:10:38 +0600] ` + `"GET /news/53f8d72920ba2744fe873ebc.html HTTP/1.1" ` + `404 177 "-" ` + `"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X)` + ` AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0` + ` Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1;` + ` +http://www.google.com/bot.html)"`, `66.249.65.3 - - [06/Nov/2014:19:11:24 +0600] ` + `"GET /?q=%E0%A6%AB%E0%A6%BE%E0%A7%9F%E0%A6%BE%E0%A6%B0 ` + `HTTP/1.1" ` + `200 4223 "-" ` + `"Mozilla/5.0 (compatible; Googlebot/2.1; ` + `+http://www.google.com/bot.html)"`, `66.249.65.62 - - [06/Nov/2014:19:12:14 +0600] ` + `"GET /?q=%E0%A6%A6%E0%A7%8B%E0%A7%9F%E0%A6%BE HTTP/1.1" ` + `200 4356 "-" ` + `"Mozilla/5.0 (compatible; Googlebot/2.1; ` + `+http://www.google.com/bot.html)"`, } b.StartTimer() for i := 0; i < b.N; i++ { globalMap = nginx.Parse(lines[i%len(lines)]) if len(globalMap) != 12 { b.Fatal(globalMap) } } b.ReportAllocs() } // don't create map // // // map-4 200000 9980 ns/op 1370 B/op 5 allocs/op // slice-4 200000 7508 ns/op 416 B/op 2 allocs/op // func Benchmark_parse_vs_findStringSubmatch(b *testing.B) { b.StopTimer() h := New() h.Must("NSS", `[^\s]*`) // not a space * h.Must("NS", `[^\s]+`) // not a space + h.Must("NLB", `[^\]]+`) // not a left bracket + h.Must("NQS", `[^"]*`) // not a quote * h.Must("NQ", `[^"]+`) // not a double quotes + h.Must("A", `.*`) // all h.Must("nginx", `%{NS:clientip}\s%{NSS:ident}\s%{NSS:auth}`+ `\s\[`+ `%{NLB:timestamp}\]\s\"`+ `%{NS:verb}\s`+ `%{NSS:request}\s`+ `HTTP/%{NS:httpversion}\"\s`+ `%{NS:response}\s`+ `%{NS:bytes}\s\"`+ `%{NQ:referrer}\"\s\"`+ `%{NQ:agent}\"`+ `%{A:blob}`) nginx, err := h.Get("nginx") if err != nil { b.Fatal(err) } lines := []string{ `66.249.65.159 - - [06/Nov/2014:19:10:38 +0600] ` + `"GET /news/53f8d72920ba2744fe873ebc.html HTTP/1.1" ` + `404 177 "-" ` + `"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X)` + ` AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0` + ` Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1;` + ` +http://www.google.com/bot.html)"`, `66.249.65.3 - - [06/Nov/2014:19:11:24 +0600] ` + `"GET /?q=%E0%A6%AB%E0%A6%BE%E0%A7%9F%E0%A6%BE%E0%A6%B0 ` + `HTTP/1.1" ` + `200 4223 "-" ` + `"Mozilla/5.0 (compatible; Googlebot/2.1; ` + `+http://www.google.com/bot.html)"`, `66.249.65.62 - - [06/Nov/2014:19:12:14 +0600] ` + `"GET /?q=%E0%A6%A6%E0%A7%8B%E0%A7%9F%E0%A6%BE HTTP/1.1" ` + `200 4356 "-" ` + `"Mozilla/5.0 (compatible; Googlebot/2.1; ` + `+http://www.google.com/bot.html)"`, } b.Run("map", func(b *testing.B) { for i := 0; i < b.N; i++ { var vals = nginx.Parse(lines[i%len(lines)]) globalString = vals["clientip"] globalString = vals["ident"] globalString = vals["auth"] globalString = vals["timestamp"] globalString = vals["verb"] globalString = vals["request"] globalString = vals["httpversion"] globalString = vals["response"] globalString = vals["bytes"] globalString = vals["referrer"] globalString = vals["agent"] globalString = vals["blob"] /* b.Log("clientip:", vals["clientip"]) b.Log("ident:", vals["ident"]) b.Log("auth:", vals["auth"]) b.Log("timestamp:", vals["timestamp"]) b.Log("verb:", vals["verb"]) b.Log("request:", vals["request"]) b.Log("httpversion:", vals["httpversion"]) b.Log("response:", vals["response"]) b.Log("bytes:", vals["bytes"]) b.Log("referrer:", vals["referrer"]) b.Log("agent:", vals["agent"]) b.Log("blob:", vals["blob"]) b.Fatal("test fatal") */ } b.ReportAllocs() }) b.Run("slice", func(b *testing.B) { for i := 0; i < b.N; i++ { var vals = nginx.FindStringSubmatch(lines[i%len(lines)]) if len(vals) < 13 { b.Fatal("shot input") } globalString = vals[1] globalString = vals[2] globalString = vals[3] globalString = vals[4] globalString = vals[5] globalString = vals[6] globalString = vals[7] globalString = vals[8] globalString = vals[9] globalString = vals[10] globalString = vals[11] globalString = vals[12] /* b.Log("clientip:", vals[1]) b.Log("ident:", vals[2]) b.Log("auth:", vals[3]) b.Log("timestamp:", vals[4]) b.Log("verb:", vals[5]) b.Log("request:", vals[6]) b.Log("httpversion:", vals[7]) b.Log("response:", vals[8]) b.Log("bytes:", vals[9]) b.Log("referrer:", vals[10]) b.Log("agent:", vals[11]) b.Log("blob:", vals[12]) b.Fatal("test fatal") */ } b.ReportAllocs() }) } grokky-0.1.0/example_test.go000066400000000000000000000024201417222633300160400ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky import ( "fmt" "log" ) func ExamplePattern_Parse() { h := New() h.Add("WORD", `\w+`) h.Add("NUMBER", `\d+`) p, err := h.Compile("%{WORD:name}/%{NUMBER:age}") if err != nil { log.Fatal(err) } result := p.Parse("Alice/15") fmt.Println("Name:", result["name"]) fmt.Println("Age:", result["age"]) // Output: // Name: Alice // Age: 15 } grokky-0.1.0/grok.go000066400000000000000000000146411417222633300143200ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // // Package grokky is a pure Golang Grok-like patterns library. This can // help you to parse log files and other. This is based on RE2 regexp // that much more faster then Oniguruma. The library disigned for creating // many patterns and using it many times. The behavior and capabilities // are slightly different from the original library. The golas of the // library are: (1) simplicity, (2) performance, (3) ease of use. package grokky // http://play.golang.org/p/vb18r_OZkK import ( "bufio" "errors" "fmt" "os" "regexp" "strings" ) var patternRegexp = regexp.MustCompile(`\%\{(\w+)(\:(\w+))?}`) var ( // ErrEmptyName arises when pattern name is an empty string ErrEmptyName = errors.New("an empty name") // ErrEmptyExpression arises when expression is an empty string ErrEmptyExpression = errors.New("an empty expression") // ErrAlreadyExist arises when pattern with given name alrady exists ErrAlreadyExist = errors.New("the pattern already exist") // ErrNotExist arises when pattern with given name doesn't exists ErrNotExist = errors.New("pattern doesn't exist") ) // helpers func split(s string) (name, sem string) { ss := patternRegexp.FindStringSubmatch(s) if len(ss) >= 2 { name = ss[1] } if len(ss) >= 4 { sem = ss[3] } return } func wrap(s string) string { return "(" + s + ")" } // host // Host is a patterns collection. Feel free to // delete the Host after all patterns (that you need) // are created. Think of it as a kind of factory. type Host map[string]string // New returns new empty host func New() Host { return make(Host) } // Add a new pattern to the Host. If pattern with given name // already exists the ErrAlreadyExists will be retuned. func (h Host) Add(name, expr string) error { if name == "" { return ErrEmptyName } if expr == "" { return ErrEmptyExpression } if _, ok := h[name]; ok { return ErrAlreadyExist } if _, err := h.compileExternal(expr); err != nil { return err } h[name] = expr return nil } func (h Host) compile(name string) (*Pattern, error) { expr, ok := h[name] if !ok { return nil, ErrNotExist } return h.compileExternal(expr) } func (h Host) compileExternal(expr string) (*Pattern, error) { // find subpatterns subs := patternRegexp.FindAllString(expr, -1) // this semantics set ts := make(map[string]struct{}) // chek: does subpatterns exist into this Host? for _, s := range subs { name, sem := split(s) if _, ok := h[name]; !ok { return nil, fmt.Errorf("the '%s' pattern doesn't exist", name) } ts[sem] = struct{}{} } // if there are not subpatterns if len(subs) == 0 { r, err := regexp.Compile(expr) if err != nil { return nil, err } p := &Pattern{Regexp: r} return p, nil } // split spl := patternRegexp.Split(expr, -1) // concat it back msi := make(map[string]int) order := 1 // semantic order var res string for i := 0; i < len(spl)-1; i++ { // split part splPart := spl[i] order += capCount(splPart) // subs part sub := subs[i] subName, subSem := split(sub) p, err := h.compile(subName) if err != nil { return nil, err } sub = p.String() subNumSubexp := p.NumSubexp() subNumSubexp++ sub = wrap(sub) if subSem != "" { msi[subSem] = order } res += splPart + sub // add sub semantics to this semantics for k, v := range p.s { if _, ok := ts[k]; !ok { msi[k] = order + v } } // increse the order order += subNumSubexp } // last spl res += spl[len(spl)-1] r, err := regexp.Compile(res) if err != nil { return nil, err } p := &Pattern{Regexp: r} p.s = msi return p, nil } // Get pattern by name from the Host func (h Host) Get(name string) (*Pattern, error) { return h.compile(name) } // Compile and get pattern without name (and without adding it to this Host) func (h Host) Compile(expr string) (*Pattern, error) { if expr == "" { return nil, ErrEmptyExpression } return h.compileExternal(expr) } // Pattern is a pattern. // Feel free to use the Pattern as regexp.Regexp. type Pattern struct { *regexp.Regexp s map[string]int } // Parse returns map (name->match) on input. The map can be empty. func (p *Pattern) Parse(input string) map[string]string { ss := p.FindStringSubmatch(input) r := make(map[string]string) if len(ss) <= 1 { return r } for sem, order := range p.s { r[sem] = ss[order] } return r } // Names returns all names that this pattern has func (p *Pattern) Names() (ss []string) { ss = make([]string, 0, len(p.s)) for k := range p.s { ss = append(ss, k) } return } var lineRegexp = regexp.MustCompile(`^(\w+)\s+(.+)$`) func (h Host) addFromLine(line string) error { sub := lineRegexp.FindStringSubmatch(line) if len(sub) == 0 { // not match return nil } return h.Add(sub[1], sub[2]) } // AddFromFile appends all patterns from the file to this Host. func (h Host) AddFromFile(path string) error { file, err := os.Open(path) if err != nil { return err } defer file.Close() scanner := bufio.NewScanner(file) for scanner.Scan() { if err := h.addFromLine(scanner.Text()); err != nil { return err } } if err := scanner.Err(); err != nil { return err } return nil } // http://play.golang.org/p/1rPuziYhRL var ( nonCapLeftRxp = regexp.MustCompile(`\(\?[imsU\-]*\:`) nonCapFlagsRxp = regexp.MustCompile(`\(?[imsU\-]+\)`) ) // cap count func capCount(in string) int { leftParens := strings.Count(in, "(") nonCapLeft := len(nonCapLeftRxp.FindAllString(in, -1)) nonCapBoth := len(nonCapFlagsRxp.FindAllString(in, -1)) escapedLeftParens := strings.Count(in, `\(`) return leftParens - nonCapLeft - nonCapBoth - escapedLeftParens } grokky-0.1.0/host_test.go000066400000000000000000000143171417222633300153720ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky //go test -coverprofile cover.out && go tool cover -html=cover.out -o cover.html import ( "bufio" "io/ioutil" "os" "testing" ) const ( patternsTest = "patterns_pass.txt" patternsFailTest = "patterns_fail.txt" ) func TestNew(t *testing.T) { h := New() if len(h) != 0 { t.Error("New returns non-empty host") } if h == nil { t.Error("New returns nil") } } func testEmptyName(t *testing.T, h Host) { l := len(h) if err := h.Add("", "expr"); err == nil { t.Error("(Host).Add is missing ErrEmptyName") } else if err != ErrEmptyName { t.Error("(Host).Add returns non-ErrEmptyName error") } if len(h) > l { t.Error("added bad patterns") } } func testEmptyExpression(t *testing.T, h Host) { l := len(h) if err := h.Add("zorro", ""); err == nil { t.Error("(Host).Add is missing ErrEmptyExpression") } else if err != ErrEmptyExpression { t.Error("(Host).Add returns non-ErrEmptyExpression error") } if len(h) > l { t.Error("added bad patterns") } } func testNormalPattern(t *testing.T, h Host) { l := len(h) if err := h.Add("DIGIT", `\d`); err != nil { t.Errorf("(Host).Add returns non-nil error: %v", err) } if len(h) != l+1 { t.Error("wrong patterns count") } } // must be invoked direct after testNormalPattern func testAlreadyExists(t *testing.T, h Host) { l := len(h) if err := h.Add("DIGIT", `[+-](0x)?\d`); err == nil { t.Error("(Host).Add is missing ErrAlreadyExist") } else if err != ErrAlreadyExist { t.Error("(Host).Add returns non-ErrAlreadyExist error") } if len(h) != l { t.Error("wrong patterns count") } } func TestHost_Add(t *testing.T) { h := New() testEmptyName(t, h) testEmptyExpression(t, h) testNormalPattern(t, h) testAlreadyExists(t, h) if err := h.Add("BAD", `(?![0-5])`); err == nil { t.Error("(Host).Add is missing any bad-regexp error") } if len(h) != 1 { t.Error("wrong patterns count") } if err := h.Add("TWODIG", `%{DIGIT}-%{DIGIT}`); err != nil { t.Errorf("(Host).Add returns non-nil error: %v", err) } if len(h) != 2 { t.Error("wrong patterns count") } if err := h.Add("THREE", `%{NOT}-%{EXIST}`); err == nil { t.Errorf("(Host).Add is missing the-pattern-not-exist error") } if len(h) != 2 { t.Error("wrong patterns count") } if err := h.Add("FOUR", `%{DIGIT:one}-%{DIGIT:two}`); err != nil { t.Errorf("(Host).Add returns non-nil error: %v", err) } if len(h) != 3 { t.Error("wrong patterns count") } if err := h.Add("FIVE", `(?!\d)%{DIGIT}(?!\d)`); err == nil { t.Errorf("(Host).Add is missing an error of regexp") } if len(h) != 3 { t.Error("wrong patterns count") } if err := h.Add("SIX", `%{FOUR:four}-%{DIGIT:six}`); err != nil { t.Errorf("(Host).Add returns non-nil error") } if len(h) != 4 { t.Error("wrong patterns count") } } func TestHost_Compile(t *testing.T) { h := New() if _, err := h.Compile(""); err == nil { t.Error("(Host).Compile missing ErrEmptyExpression") } else if err != ErrEmptyExpression { t.Error("(Host).Compile returns non-ErrEmptyExpression error") } if len(h) != 0 { t.Error("(Host).Compile: (bad) pattern added to host") } if p, err := h.Compile(`\d+`); err != nil { t.Error("(Host).Compile error:", err) } else if p == nil { t.Error("(Host).Compile returns nil (and no errors)") } if len(h) != 0 { t.Error("(Host).Compile: pattern added to host") } } func TestHost_Get(t *testing.T) { h := New() if err := h.Add("DIG", `\d`); err != nil { t.Error(err) } if p, err := h.Get("DIG"); err != nil { t.Error(err) } else if p == nil { t.Error("(Host).Get returns nil (and nil-error)") } if p, err := h.Get("SEVEN"); err == nil { t.Error("(Host).Get is missing ErrNotExist") } else if p != nil { t.Error("(Host).Get returns non-nil not-exsted-pattern") } } func tempFile(t *testing.T) (name string) { f, err := ioutil.TempFile("", "") if err != nil { t.Skip("unable to create temporary file") return } defer f.Close() if _, err = f.Write(make([]byte, bufio.MaxScanTokenSize+1)); err != nil { t.Skip("unable to write to temporary file") return } return f.Name() } func TestHost_AddFromFile(t *testing.T) { h := New() if err := h.AddFromFile(patternsTest); err != nil { t.Error(err) } if len(h) != 3 { t.Error("wrong patterns count") } if _, err := h.Get("ONE"); err != nil { t.Error(err) } if _, err := h.Get("TWO"); err != nil { t.Error(err) } if _, err := h.Get("THREE"); err != nil { t.Error(err) } } func TestHost_AddFromFile_malformedPatterns(t *testing.T) { h := New() if err := h.AddFromFile(patternsFailTest); err == nil { t.Error("(Host).AddFromFile (should fail): missing error") } } func TestHost_AddFromFile_scannerError(t *testing.T) { h := New() name := tempFile(t) t.Log("create tmporary file:", name) defer os.Remove(name) if err := h.AddFromFile(name); err == nil { t.Error("(Host).AddFromFile (should fail): missing error") } } func TestHost_inject(t *testing.T) { h := New() h["TWO"] = `(?!\d)` if err := h.Add("ONE", `%{TWO:one}`); err == nil { t.Error("bad injection returns nil error") } } func TestHost_badPath(t *testing.T) { h := New() if err := h.AddFromFile("unexisted-file-without-patterns"); err == nil { t.Error("bad path with nil error") } } func TestHost_addFromLine(t *testing.T) { h := New() if err := h.addFromLine("ONE (?!\\d)"); err == nil { t.Error("bad line with nil error") } } grokky-0.1.0/pattern_test.go000066400000000000000000000062371417222633300160740ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky import ( "testing" ) func terr(t *testing.T, err error) { if err != nil { t.Error(err) } } func mssTest(expect, got map[string]string) bool { if len(expect) != len(got) { return false } for k, v := range expect { if v != got[k] { return false } } return true } func TestPattern_Parse(t *testing.T) { h := New() // compile terr(t, h.Add("ONE", `\d`)) terr(t, h.Add("TWO", `%{ONE:one}-%{ONE:two}`)) terr(t, h.Add("THREE", `%{ONE:zero}-%{TWO:three}`)) // if p, err := h.Get("ONE"); err != nil { t.Error(err) } else if !mssTest(nil, p.Parse("1")) { t.Error("unnamed result") } p, err := h.Get("TWO") if err != nil { t.Error(err) } if !mssTest(map[string]string{"one": "1", "two": "2"}, p.Parse("1-2")) { t.Error("bad result") } p, err = h.Get("THREE") if err != nil { t.Error(err) } if !mssTest(map[string]string{ "one": "1", "two": "2", "zero": "0", "three": "1-2", }, p.Parse("0-1-2")) { t.Error("bad result") } if err := h.Add("FOUR", `%{TWO:two}`); err != nil { t.Error(err) } p, err = h.Get("FOUR") if err != nil { t.Error(err) } if !mssTest(map[string]string{"one": "1", "two": "1-2"}, p.Parse("1-2")) { t.Error("bad result") } } func TestPattern_nestedGroups(t *testing.T) { h := New() if err := h.Add("ONE", `\d`); err != nil { t.Error(err) } if err := h.Add("TWO", `(?:%{ONE:one})-(?:%{ONE:two})?`); err != nil { t.Error(err) } p, err := h.Get("TWO") if err != nil { t.Error(err) } mss := p.Parse("1-2") if len(mss) != 2 || mss["one"] != "1" || mss["two"] != "2" { t.Error("bad result") } mss = p.Parse("1-") if len(mss) != 2 || mss["one"] != "1" || mss["two"] != "" { t.Error("bad result") } } func TestPattern_Names(t *testing.T) { h := New() if err := h.Add("ONE", `\d`); err != nil { t.Error(err) } if err := h.Add("TWO", `%{ONE:one}-%{ONE:two}`); err != nil { t.Error(err) } if err := h.Add("THREE", `%{ONE:zero}-%{TWO:three}`); err != nil { t.Error(err) } p, err := h.Get("THREE") if err != nil { t.Fatal(err) } ss := p.Names() if len(ss) != 4 { t.Error("Names returns wrong values count") } for _, v := range ss { if !(v == "one" || v == "two" || v == "zero" || v == "three") { t.Error("Names returns wrong values:", v) } } } grokky-0.1.0/patterns/000077500000000000000000000000001417222633300146615ustar00rootroot00000000000000grokky-0.1.0/patterns/aws000066400000000000000000000022541417222633300154010ustar00rootroot00000000000000S3_REQUEST_LINE (?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) S3_ACCESS_LOG %{WORD:owner} %{NOTSPACE:bucket} \[%{HTTPDATE:timestamp}\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:"%{S3_REQUEST_LINE}"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:"?%{QS:agent}"?|-) (?:-|%{NOTSPACE:version_id}) ELB_URIPATHPARAM %{URIPATH:path}(?:%{URIPARAM:params})? ELB_URI %{URIPROTO:proto}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST:urihost})?(?:%{ELB_URIPATHPARAM})? ELB_REQUEST_LINE (?:%{WORD:verb} %{ELB_URI:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) ELB_ACCESS_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:elb} %{IP:clientip}:%{INT:clientport:int} (?:(%{IP:backendip}:?:%{INT:backendport:int})|-) %{NUMBER:request_processing_time:float} %{NUMBER:backend_processing_time:float} %{NUMBER:response_processing_time:float} %{INT:response:int} %{INT:backend_response:int} %{INT:received_bytes:int} %{INT:bytes:int} "%{ELB_REQUEST_LINE}"grokky-0.1.0/patterns/bacula000066400000000000000000000113261417222633300160360ustar00rootroot00000000000000BACULA_TIMESTAMP %{MONTHDAY}-%{MONTH} %{HOUR}:%{MINUTE} BACULA_HOST [a-zA-Z0-9-]+ BACULA_VOLUME %{USER} BACULA_DEVICE %{USER} BACULA_DEVICEPATH %{UNIXPATH} BACULA_CAPACITY %{INT}{1,3}(,%{INT}{3})* BACULA_VERSION %{USER} BACULA_JOB %{USER} BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY} exceeded on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:volume}\" Bytes=%{BACULA_CAPACITY} Blocks=%{BACULA_CAPACITY} at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:volume}\" in catalog. BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\). BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE}\" \(%{BACULA_DEVICEPATH}\) BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:volume}\" mounted on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. BACULA_LOG_NOOPEN \s+Cannot open %{DATA}: ERR=%{GREEDYDATA:berror} BACULA_LOG_NOOPENDIR \s+Could not open directory %{DATA}: ERR=%{GREEDYDATA:berror} BACULA_LOG_NOSTAT \s+Could not stat %{DATA}: ERR=%{GREEDYDATA:berror} BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:volume}\". Marking it purged. BACULA_LOG_ALL_RECORDS_PRUNED All records pruned from Volume \"%{BACULA_VOLUME:volume}\"; marking it \"Purged\" BACULA_LOG_BEGIN_PRUNE_JOBS Begin pruning Jobs older than %{INT} month %{INT} days . BACULA_LOG_BEGIN_PRUNE_FILES Begin pruning Files. BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. BACULA_LOG_ENDPRUNE End auto prune. BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:job} BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:job} BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:device}\" BACULA_LOG_DIFF_FS \s+%{UNIXPATH} is a different filesystem. Will not descend from %{UNIXPATH} into it. BACULA_LOG_JOBEND Job write elapsed time = %{DATA:elapsed}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune. BACULA_LOG_NOPRUNE_FILES No Files found to prune. BACULA_LOG_VOLUME_PREVWRITTEN Volume \"%{BACULA_VOLUME:volume}\" previously written, moving to end of data. BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:volume}\" size=%{INT} BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT}. BACULA_LOG_MARKCANCEL JobId %{INT}, Job %{BACULA_JOB:job} marked to be canceled. BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:runjob}\" BACULA_LOG_VSS (Generate )?VSS (Writer)? BACULA_LOG_MAXSTART Fatal error: Job canceled because max start delay time exceeded. BACULA_LOG_DUPLICATE Fatal error: JobId %{INT:duplicate} already running. Duplicate job not allowed. BACULA_LOG_NOJOBSTAT Fatal error: No Job status returned from FD. BACULA_LOG_FATAL_CONN Fatal error: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=%{GREEDYDATA:berror} BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=%{GREEDYDATA:berror} BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at %{HOSTNAME}. Possible causes: BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup. BACULA_LOG_NOPRIOR No prior Full backup Job record found. BACULA_LOG_JOB (Error: )?Bacula %{BACULA_HOST} %{BACULA_VERSION} \(%{BACULA_VERSION}\): BACULA_LOGLINE %{BACULA_TIMESTAMP:bts} %{BACULA_HOST:hostname} JobId %{INT:jobid}: (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR})grokky-0.1.0/patterns/bro000066400000000000000000000041511417222633300153670ustar00rootroot00000000000000# https://www.bro.org/sphinx/script-reference/log-files.html # http.log BRO_HTTP %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{INT:trans_depth}\t%{GREEDYDATA:method}\t%{GREEDYDATA:domain}\t%{GREEDYDATA:uri}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:user_agent}\t%{NUMBER:request_body_len}\t%{NUMBER:response_body_len}\t%{GREEDYDATA:status_code}\t%{GREEDYDATA:status_msg}\t%{GREEDYDATA:info_code}\t%{GREEDYDATA:info_msg}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:bro_tags}\t%{GREEDYDATA:username}\t%{GREEDYDATA:password}\t%{GREEDYDATA:proxied}\t%{GREEDYDATA:orig_fuids}\t%{GREEDYDATA:orig_mime_types}\t%{GREEDYDATA:resp_fuids}\t%{GREEDYDATA:resp_mime_types} # dns.log BRO_DNS %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{INT:trans_id}\t%{GREEDYDATA:query}\t%{GREEDYDATA:qclass}\t%{GREEDYDATA:qclass_name}\t%{GREEDYDATA:qtype}\t%{GREEDYDATA:qtype_name}\t%{GREEDYDATA:rcode}\t%{GREEDYDATA:rcode_name}\t%{GREEDYDATA:AA}\t%{GREEDYDATA:TC}\t%{GREEDYDATA:RD}\t%{GREEDYDATA:RA}\t%{GREEDYDATA:Z}\t%{GREEDYDATA:answers}\t%{GREEDYDATA:TTLs}\t%{GREEDYDATA:rejected} # conn.log BRO_CONN %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{GREEDYDATA:service}\t%{NUMBER:duration}\t%{NUMBER:orig_bytes}\t%{NUMBER:resp_bytes}\t%{GREEDYDATA:conn_state}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:missed_bytes}\t%{GREEDYDATA:history}\t%{GREEDYDATA:orig_pkts}\t%{GREEDYDATA:orig_ip_bytes}\t%{GREEDYDATA:resp_pkts}\t%{GREEDYDATA:resp_ip_bytes}\t%{GREEDYDATA:tunnel_parents} # files.log BRO_FILES %{NUMBER:ts}\t%{NOTSPACE:fuid}\t%{IP:tx_hosts}\t%{IP:rx_hosts}\t%{NOTSPACE:conn_uids}\t%{GREEDYDATA:source}\t%{GREEDYDATA:depth}\t%{GREEDYDATA:analyzers}\t%{GREEDYDATA:mime_type}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:duration}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:is_orig}\t%{GREEDYDATA:seen_bytes}\t%{GREEDYDATA:total_bytes}\t%{GREEDYDATA:missing_bytes}\t%{GREEDYDATA:overflow_bytes}\t%{GREEDYDATA:timedout}\t%{GREEDYDATA:parent_fuid}\t%{GREEDYDATA:md5}\t%{GREEDYDATA:sha1}\t%{GREEDYDATA:sha256}\t%{GREEDYDATA:extracted}grokky-0.1.0/patterns/exim000066400000000000000000000015551417222633300155540ustar00rootroot00000000000000EXIM_MSGID [0-9A-Za-z]{6}-[0-9A-Za-z]{6}-[0-9A-Za-z]{2} EXIM_FLAGS (<=|[-=>*]>|[*]{2}|==) EXIM_DATE %{YEAR:exim_year}-%{MONTHNUM:exim_month}-%{MONTHDAY:exim_day} %{TIME:exim_time} EXIM_PID \[%{POSINT}\] EXIM_QT ((\d+y)?(\d+w)?(\d+d)?(\d+h)?(\d+m)?(\d+s)?) EXIM_EXCLUDE_TERMS (Message is frozen|(Start|End) queue run| Warning: | retry time not reached | no (IP address|host name) found for (IP address|host) | unexpected disconnection while reading SMTP command | no immediate delivery: |another process is handling this message) EXIM_REMOTE_HOST (H=(%{NOTSPACE:remote_hostname} )?(\(%{NOTSPACE:remote_heloname}\) )?\[%{IP:remote_host}\]) EXIM_INTERFACE (I=\[%{IP:exim_interface}\](:%{NUMBER:exim_interface_port})) EXIM_PROTOCOL (P=%{NOTSPACE:protocol}) EXIM_MSG_SIZE (S=%{NUMBER:exim_msg_size}) EXIM_HEADER_ID (id=%{NOTSPACE:exim_header_id}) EXIM_SUBJECT (T=%{QS:exim_subject})grokky-0.1.0/patterns/firewalls000066400000000000000000000225071417222633300166020ustar00rootroot00000000000000# NetScreen firewall logs NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:date} %{IPORHOST:device} %{IPORHOST}: NetScreen device_id=%{WORD:device_id}%{DATA}: start_time=%{QUOTEDSTRING:start_time} duration=%{INT:duration} policy_id=%{INT:policy_id} service=%{DATA:service} proto=%{INT:proto} src zone=%{WORD:src_zone} dst zone=%{WORD:dst_zone} action=%{WORD:action} sent=%{INT:sent} rcvd=%{INT:rcvd} src=%{IPORHOST:src_ip} dst=%{IPORHOST:dst_ip} src_port=%{INT:src_port} dst_port=%{INT:dst_port} src-xlated ip=%{IPORHOST:src_xlated_ip} port=%{INT:src_xlated_port} dst-xlated ip=%{IPORHOST:dst_xlated_ip} port=%{INT:dst_xlated_port} session_id=%{INT:session_id} reason=%{GREEDYDATA:reason} #== Cisco ASA == CISCOTAG [A-Z0-9]+-%{INT}-(?:[A-Z0-9_]+) CISCOTIMESTAMP %{MONTH} +%{MONTHDAY}(?: %{YEAR})? %{TIME} CISCO_TAGGED_SYSLOG ^<%{POSINT:syslog_pri}>%{CISCOTIMESTAMP:timestamp}( %{SYSLOGHOST:sysloghost})? ?: %%{CISCOTAG:ciscotag}: # Common Particles CISCO_ACTION Built|Teardown|Deny|Denied|denied|requested|permitted|denied by ACL|discarded|est-allowed|Dropping|created|deleted CISCO_REASON Duplicate TCP SYN|Failed to locate egress interface|Invalid transport field|No matching connection|DNS Response|DNS Query|(?:%{WORD}\s*)* CISCO_DIRECTION Inbound|inbound|Outbound|outbound CISCO_INTERVAL first hit|%{INT}-second interval CISCO_XLATE_TYPE static|dynamic # ASA-1-104001 CISCOFW104001 \((?:Primary|Secondary)\) Switching to ACTIVE - %{GREEDYDATA:switch_reason} # ASA-1-104002 CISCOFW104002 \((?:Primary|Secondary)\) Switching to STANDBY - %{GREEDYDATA:switch_reason} # ASA-1-104003 CISCOFW104003 \((?:Primary|Secondary)\) Switching to FAILED\. # ASA-1-104004 CISCOFW104004 \((?:Primary|Secondary)\) Switching to OK\. # ASA-1-105003 CISCOFW105003 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} waiting # ASA-1-105004 CISCOFW105004 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} normal # ASA-1-105005 CISCOFW105005 \((?:Primary|Secondary)\) Lost Failover communications with mate on [Ii]nterface %{GREEDYDATA:interface_name} # ASA-1-105008 CISCOFW105008 \((?:Primary|Secondary)\) Testing [Ii]nterface %{GREEDYDATA:interface_name} # ASA-1-105009 CISCOFW105009 \((?:Primary|Secondary)\) Testing on [Ii]nterface %{GREEDYDATA:interface_name} (?:Passed|Failed) # ASA-2-106001 CISCOFW106001 %{CISCO_DIRECTION:direction} %{WORD:protocol} connection %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{GREEDYDATA:tcp_flags} on interface %{GREEDYDATA:interface} # ASA-2-106006, ASA-2-106007, ASA-2-106010 CISCOFW106006_106007_106010 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} (?:from|src) %{IP:src_ip}/%{INT:src_port}(\(%{DATA:src_fwuser}\))? (?:to|dst) %{IP:dst_ip}/%{INT:dst_port}(\(%{DATA:dst_fwuser}\))? (?:on interface %{DATA:interface}|due to %{CISCO_REASON:reason}) # ASA-3-106014 CISCOFW106014 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} src %{DATA:src_interface}:%{IP:src_ip}(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{IP:dst_ip}(\(%{DATA:dst_fwuser}\))? \(type %{INT:icmp_type}, code %{INT:icmp_code}\) # ASA-6-106015 CISCOFW106015 %{CISCO_ACTION:action} %{WORD:protocol} \(%{DATA:policy_id}\) from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{DATA:tcp_flags} on interface %{GREEDYDATA:interface} # ASA-1-106021 CISCOFW106021 %{CISCO_ACTION:action} %{WORD:protocol} reverse path check from %{IP:src_ip} to %{IP:dst_ip} on interface %{GREEDYDATA:interface} # ASA-4-106023 CISCOFW106023 %{CISCO_ACTION:action}( protocol)? %{WORD:protocol} src %{DATA:src_interface}:%{DATA:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{DATA:dst_ip}(/%{INT:dst_port})?(\(%{DATA:dst_fwuser}\))?( \(type %{INT:icmp_type}, code %{INT:icmp_code}\))? by access-group "?%{DATA:policy_id}"? \[%{DATA:hashcode1}, %{DATA:hashcode2}\] # ASA-4-106100, ASA-4-106102, ASA-4-106103 CISCOFW106100_2_3 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} for user '%{DATA:src_fwuser}' %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\) -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\) hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] # ASA-5-106100 CISCOFW106100 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\)(\(%{DATA:src_fwuser}\))? -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\)(\(%{DATA:src_fwuser}\))? hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] # ASA-6-110002 CISCOFW110002 %{CISCO_REASON:reason} for %{WORD:protocol} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} # ASA-6-302010 CISCOFW302010 %{INT:connection_count} in use, %{INT:connection_count_max} most used # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection %{INT:connection_id} for %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port}( \(%{IP:src_mapped_ip}/%{INT:src_mapped_port}\))?(\(%{DATA:src_fwuser}\))? to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}( \(%{IP:dst_mapped_ip}/%{INT:dst_mapped_port}\))?(\(%{DATA:dst_fwuser}\))?( duration %{TIME:duration} bytes %{INT:bytes})?(?: %{CISCO_REASON:reason})?( \(%{DATA:user}\))? # ASA-6-302020, ASA-6-302021 CISCOFW302020_302021 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection for faddr %{IP:dst_ip}/%{INT:icmp_seq_num}(?:\(%{DATA:fwuser}\))? gaddr %{IP:src_xlated_ip}/%{INT:icmp_code_xlated} laddr %{IP:src_ip}/%{INT:icmp_code}( \(%{DATA:user}\))? # ASA-6-305011 CISCOFW305011 %{CISCO_ACTION:action} %{CISCO_XLATE_TYPE:xlate_type} %{WORD:protocol} translation from %{DATA:src_interface}:%{IP:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? to %{DATA:src_xlated_interface}:%{IP:src_xlated_ip}/%{DATA:src_xlated_port} # ASA-3-313001, ASA-3-313004, ASA-3-313008 CISCOFW313001_313004_313008 %{CISCO_ACTION:action} %{WORD:protocol} type=%{INT:icmp_type}, code=%{INT:icmp_code} from %{IP:src_ip} on interface %{DATA:interface}( to %{IP:dst_ip})? # ASA-4-313005 CISCOFW313005 %{CISCO_REASON:reason} for %{WORD:protocol} error message: %{WORD:err_protocol} src %{DATA:err_src_interface}:%{IP:err_src_ip}(\(%{DATA:err_src_fwuser}\))? dst %{DATA:err_dst_interface}:%{IP:err_dst_ip}(\(%{DATA:err_dst_fwuser}\))? \(type %{INT:err_icmp_type}, code %{INT:err_icmp_code}\) on %{DATA:interface} interface\. Original IP payload: %{WORD:protocol} src %{IP:orig_src_ip}/%{INT:orig_src_port}(\(%{DATA:orig_src_fwuser}\))? dst %{IP:orig_dst_ip}/%{INT:orig_dst_port}(\(%{DATA:orig_dst_fwuser}\))? # ASA-5-321001 CISCOFW321001 Resource '%{WORD:resource_name}' limit of %{POSINT:resource_limit} reached for system # ASA-4-402117 CISCOFW402117 %{WORD:protocol}: Received a non-IPSec packet \(protocol= %{WORD:orig_protocol}\) from %{IP:src_ip} to %{IP:dst_ip} # ASA-4-402119 CISCOFW402119 %{WORD:protocol}: Received an %{WORD:orig_protocol} packet \(SPI= %{DATA:spi}, sequence number= %{DATA:seq_num}\) from %{IP:src_ip} \(user= %{DATA:user}\) to %{IP:dst_ip} that failed anti-replay checking # ASA-4-419001 CISCOFW419001 %{CISCO_ACTION:action} %{WORD:protocol} packet from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}, reason: %{GREEDYDATA:reason} # ASA-4-419002 CISCOFW419002 %{CISCO_REASON:reason} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} with different initial sequence number # ASA-4-500004 CISCOFW500004 %{CISCO_REASON:reason} for protocol=%{WORD:protocol}, from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} # ASA-6-602303, ASA-6-602304 CISCOFW602303_602304 %{WORD:protocol}: An %{CISCO_DIRECTION:direction} %{GREEDYDATA:tunnel_type} SA \(SPI= %{DATA:spi}\) between %{IP:src_ip} and %{IP:dst_ip} \(user= %{DATA:user}\) has been %{CISCO_ACTION:action} # ASA-7-710001, ASA-7-710002, ASA-7-710003, ASA-7-710005, ASA-7-710006 CISCOFW710001_710002_710003_710005_710006 %{WORD:protocol} (?:request|access) %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} # ASA-6-713172 CISCOFW713172 Group = %{GREEDYDATA:group}, IP = %{IP:src_ip}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:is_remote_natted}\s*behind a NAT device\s+This\s+end\s*%{DATA:is_local_natted}\s*behind a NAT device # ASA-4-733100 CISCOFW733100 \[\s*%{DATA:drop_type}\s*\] drop %{DATA:drop_rate_id} exceeded. Current burst rate is %{INT:drop_rate_current_burst} per second, max configured rate is %{INT:drop_rate_max_burst}; Current average rate is %{INT:drop_rate_current_avg} per second, max configured rate is %{INT:drop_rate_max_avg}; Cumulative total count is %{INT:drop_total_count} #== End Cisco ASA == # Shorewall firewall logs SHOREWALL (%{SYSLOGTIMESTAMP:timestamp}) (%{WORD:nf_host}) kernel:.*Shorewall:(%{WORD:nf_action1})?:(%{WORD:nf_action2})?.*IN=(%{USERNAME:nf_in_interface})?.*(OUT= *MAC=(%{COMMONMAC:nf_dst_mac}):(%{COMMONMAC:nf_src_mac})?|OUT=%{USERNAME:nf_out_interface}).*SRC=(%{IPV4:nf_src_ip}).*DST=(%{IPV4:nf_dst_ip}).*LEN=(%{WORD:nf_len}).*?TOS=(%{WORD:nf_tos}).*?PREC=(%{WORD:nf_prec}).*?TTL=(%{INT:nf_ttl}).*?ID=(%{INT:nf_id}).*?PROTO=(%{WORD:nf_protocol}).*?SPT=(%{INT:nf_src_port}?.*DPT=%{INT:nf_dst_port}?.*) #== End Shorewallgrokky-0.1.0/patterns/haproxy000066400000000000000000000062371417222633300163060ustar00rootroot00000000000000## These patterns were tested w/ haproxy-1.4.15 ## Documentation of the haproxy log formats can be found at the following links: ## http://code.google.com/p/haproxy-docs/wiki/HTTPLogFormat ## http://code.google.com/p/haproxy-docs/wiki/TCPLogFormat HAPROXYTIME %{HOUR:haproxy_hour}:%{MINUTE:haproxy_minute}(?::%{SECOND:haproxy_second}) HAPROXYDATE %{MONTHDAY:haproxy_monthday}/%{MONTH:haproxy_month}/%{YEAR:haproxy_year}:%{HAPROXYTIME:haproxy_time}.%{INT:haproxy_milliseconds} # Override these default patterns to parse out what is captured in your haproxy.cfg HAPROXYCAPTUREDREQUESTHEADERS %{DATA:captured_request_headers} HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:captured_response_headers} # Example: # These haproxy config lines will add data to the logs that are captured # by the patterns below. Place them in your custom patterns directory to # override the defaults. # # capture request header Host len 40 # capture request header X-Forwarded-For len 50 # capture request header Accept-Language len 50 # capture request header Referer len 200 # capture request header User-Agent len 200 # # capture response header Content-Type len 30 # capture response header Content-Encoding len 10 # capture response header Cache-Control len 200 # capture response header Last-Modified len 200 # # HAPROXYCAPTUREDREQUESTHEADERS %{DATA:request_header_host}\|%{DATA:request_header_x_forwarded_for}\|%{DATA:request_header_accept_language}\|%{DATA:request_header_referer}\|%{DATA:request_header_user_agent} # HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:response_header_content_type}\|%{DATA:response_header_content_encoding}\|%{DATA:response_header_cache_control}\|%{DATA:response_header_last_modified} # parse a haproxy 'httplog' line HAPROXYHTTPBASE %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_request}/%{INT:time_queue}/%{INT:time_backend_connect}/%{INT:time_backend_response}/%{NOTSPACE:time_duration} %{INT:http_status_code} %{NOTSPACE:bytes_read} %{DATA:captured_request_cookie} %{DATA:captured_response_cookie} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} (\{%{HAPROXYCAPTUREDREQUESTHEADERS}\})?( )?(\{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?( )?"(|(%{WORD:http_verb} (%{URIPROTO:http_proto}://)?(?:%{USER:http_user}(?::[^@]*)?@)?(?:%{URIHOST:http_host})?(?:%{URIPATHPARAM:http_request})?( HTTP/%{NUMBER:http_version})?))?" HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} # parse a haproxy 'tcplog' line HAPROXYTCP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_queue}/%{INT:time_backend_connect}/%{NOTSPACE:time_duration} %{NOTSPACE:bytes_read} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue}grokky-0.1.0/patterns/java000066400000000000000000000025001417222633300155220ustar00rootroot00000000000000JAVACLASS (?:[a-zA-Z$_][a-zA-Z$_0-9]*\.)*[a-zA-Z$_][a-zA-Z$_0-9]* #Space is an allowed character to match special cases like 'Native Method' or 'Unknown Source' JAVAFILE (?:[A-Za-z0-9_. -]+) #Allow special method JAVAMETHOD (?:()|[a-zA-Z$_][a-zA-Z$_0-9]*) #Line number is optional in special cases 'Native method' or 'Unknown source' JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:class}\.%{JAVAMETHOD:method}\(%{JAVAFILE:file}(?::%{NUMBER:line})?\) # Java Logs JAVATHREAD (?:[A-Z]{2}-Processor[\d]+) ##JAVACLASS (?:[a-zA-Z0-9-]+\.)+[A-Za-z0-9$]+ ##JAVAFILE (?:[A-Za-z0-9_.-]+) ##JAVASTACKTRACEPART at %{JAVACLASS:class}\.%{WORD:method}\(%{JAVAFILE:file}:%{NUMBER:line}\) JAVALOGMESSAGE (.*) # MMM dd, yyyy HH:mm:ss eg: Jan 9, 2014 7:13:13 AM CATALINA_DATESTAMP %{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM) # yyyy-MM-dd HH:mm:ss,SSS ZZZ eg: 2014-01-09 17:32:25,527 -0800 TOMCAT_DATESTAMP 20%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) %{ISO8601_TIMEZONE} CATALINALOG %{CATALINA_DATESTAMP:timestamp} %{JAVACLASS:class} %{JAVALOGMESSAGE:logmessage} # 2014-01-09 20:03:28,269 -0800 | ERROR | com.example.service.ExampleService - something compeletely unexpected happened... TOMCATLOG %{TOMCAT_DATESTAMP:timestamp} \| %{LOGLEVEL:level} \| %{JAVACLASS:class} - %{JAVALOGMESSAGE:logmessage}grokky-0.1.0/patterns/junos000066400000000000000000000020761417222633300157470ustar00rootroot00000000000000# JUNOS 11.4 RT_FLOW patterns RT_FLOW_EVENT (RT_FLOW_SESSION_CREATE|RT_FLOW_SESSION_CLOSE|RT_FLOW_SESSION_DENY) RT_FLOW1 %{RT_FLOW_EVENT:event}: %{GREEDYDATA:close-reason}: %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} \d+\(%{DATA:sent}\) \d+\(%{DATA:received}\) %{INT:elapsed-time} .* RT_FLOW2 %{RT_FLOW_EVENT:event}: session created %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} .* RT_FLOW3 %{RT_FLOW_EVENT:event}: session denied %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{INT:protocol-id}\(\d\) %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} .* grokky-0.1.0/patterns/linux-syslog000066400000000000000000000020101417222633300172520ustar00rootroot00000000000000SYSLOG5424PRINTASCII [!-~]+ SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource}+(?: %{SYSLOGPROG}:|) SYSLOGPAMSESSION %{SYSLOGBASE} %{GREEDYDATA:message}%{WORD:pam_module}\(%{DATA:pam_caller}\): session %{WORD:pam_session_state} for user %{USERNAME:username}(?: by %{GREEDYDATA:pam_by})? CRON_ACTION [A-Z ]+ CRONLOG %{SYSLOGBASE} \(%{USER:user}\) %{CRON_ACTION:action} \(%{DATA:message}\) SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} # IETF 5424 syslog(8) format (see http://www.rfc-editor.org/info/rfc5424) SYSLOG5424PRI <%{NONNEGINT:syslog5424_pri}> SYSLOG5424SD \[%{DATA}\]+ SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:syslog5424_ts}|-) +(?:%{HOSTNAME:syslog5424_host}|-) +(-|%{SYSLOG5424PRINTASCII:syslog5424_app}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_proc}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_msgid}) +(?:%{SYSLOG5424SD:syslog5424_sd}|-|) SYSLOG5424LINE %{SYSLOG5424BASE} +%{GREEDYDATA:syslog5424_msg}grokky-0.1.0/patterns/mcollective000066400000000000000000000002751417222633300171160ustar00rootroot00000000000000# Remember, these can be multi-line events. MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\]%{SPACE}%{LOGLEVEL:event_level} MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}:grokky-0.1.0/patterns/mongodb000066400000000000000000000011351417222633300162310ustar00rootroot00000000000000MONGO_LOG %{SYSLOGTIMESTAMP:timestamp} \[%{WORD:component}\] %{GREEDYDATA:message} MONGO_QUERY \{ \{ .* \} ntoreturn: \} MONGO_WORDDASH \b[\w-]+\b MONGO_SLOWQUERY %{WORD} %{MONGO_WORDDASH:database}\.%{MONGO_WORDDASH:collection} %{WORD}: %{MONGO_QUERY:query} %{WORD}:%{NONNEGINT:ntoreturn} %{WORD}:%{NONNEGINT:ntoskip} %{WORD}:%{NONNEGINT:nscanned}.*nreturned:%{NONNEGINT:nreturned}..+ %{POSINT:duration}ms MONGO3_SEVERITY \w MONGO3_COMPONENT %{WORD}|- MONGO3_LOG %{TIMESTAMP_ISO8601:timestamp} %{MONGO3_SEVERITY:severity} %{MONGO3_COMPONENT:component}%{SPACE}(?:\[%{DATA:context}\])? %{GREEDYDATA:message}grokky-0.1.0/patterns/nagios000066400000000000000000000225741417222633300160760ustar00rootroot00000000000000################################################################################## ################################################################################## # Chop Nagios log files to smithereens! # # A set of GROK filters to process logfiles generated by Nagios. # While it does not, this set intends to cover all possible Nagios logs. # # Some more work needs to be done to cover all External Commands: # http://old.nagios.org/developerinfo/externalcommands/commandlist.php # # If you need some support on these rules please contact: # Jelle Smet http://smetj.net # ################################################################################# ################################################################################# NAGIOSTIME \[%{NUMBER:nagios_epoch}\] ############################################### ######## Begin nagios log types ############################################### NAGIOS_TYPE_CURRENT_SERVICE_STATE CURRENT SERVICE STATE NAGIOS_TYPE_CURRENT_HOST_STATE CURRENT HOST STATE NAGIOS_TYPE_SERVICE_NOTIFICATION SERVICE NOTIFICATION NAGIOS_TYPE_HOST_NOTIFICATION HOST NOTIFICATION NAGIOS_TYPE_SERVICE_ALERT SERVICE ALERT NAGIOS_TYPE_HOST_ALERT HOST ALERT NAGIOS_TYPE_SERVICE_FLAPPING_ALERT SERVICE FLAPPING ALERT NAGIOS_TYPE_HOST_FLAPPING_ALERT HOST FLAPPING ALERT NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT SERVICE DOWNTIME ALERT NAGIOS_TYPE_HOST_DOWNTIME_ALERT HOST DOWNTIME ALERT NAGIOS_TYPE_PASSIVE_SERVICE_CHECK PASSIVE SERVICE CHECK NAGIOS_TYPE_PASSIVE_HOST_CHECK PASSIVE HOST CHECK NAGIOS_TYPE_SERVICE_EVENT_HANDLER SERVICE EVENT HANDLER NAGIOS_TYPE_HOST_EVENT_HANDLER HOST EVENT HANDLER NAGIOS_TYPE_EXTERNAL_COMMAND EXTERNAL COMMAND NAGIOS_TYPE_TIMEPERIOD_TRANSITION TIMEPERIOD TRANSITION ############################################### ######## End nagios log types ############################################### ############################################### ######## Begin external check types ############################################### NAGIOS_EC_DISABLE_SVC_CHECK DISABLE_SVC_CHECK NAGIOS_EC_ENABLE_SVC_CHECK ENABLE_SVC_CHECK NAGIOS_EC_DISABLE_HOST_CHECK DISABLE_HOST_CHECK NAGIOS_EC_ENABLE_HOST_CHECK ENABLE_HOST_CHECK NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT PROCESS_SERVICE_CHECK_RESULT NAGIOS_EC_PROCESS_HOST_CHECK_RESULT PROCESS_HOST_CHECK_RESULT NAGIOS_EC_SCHEDULE_SERVICE_DOWNTIME SCHEDULE_SERVICE_DOWNTIME NAGIOS_EC_SCHEDULE_HOST_DOWNTIME SCHEDULE_HOST_DOWNTIME NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS DISABLE_HOST_SVC_NOTIFICATIONS NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS ENABLE_HOST_SVC_NOTIFICATIONS NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS DISABLE_HOST_NOTIFICATIONS NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS ENABLE_HOST_NOTIFICATIONS NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS DISABLE_SVC_NOTIFICATIONS NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS ENABLE_SVC_NOTIFICATIONS ############################################### ######## End external check types ############################################### NAGIOS_WARNING Warning:%{SPACE}%{GREEDYDATA:nagios_message} NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:nagios_type}: %{DATA:nagios_service};%{DATA:nagios_unknown1};%{DATA:nagios_unknown2} #################### #### External checks #################### #Disable host & service check NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} #Enable host & service check NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} #Process host & service check NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} #Disable host & service notifications NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} #Enable host & service notifications NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} #Schedule host & service downtime NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:nagios_command};%{DATA:nagios_hostname};%{NUMBER:nagios_start_time};%{NUMBER:nagios_end_time};%{NUMBER:nagios_fixed};%{NUMBER:nagios_trigger_id};%{NUMBER:nagios_duration};%{DATA:author};%{DATA:comment} #End matching line NAGIOSLOGLINE %{NAGIOSTIME} (?:%{NAGIOS_WARNING}|%{NAGIOS_CURRENT_SERVICE_STATE}|%{NAGIOS_CURRENT_HOST_STATE}|%{NAGIOS_SERVICE_NOTIFICATION}|%{NAGIOS_HOST_NOTIFICATION}|%{NAGIOS_SERVICE_ALERT}|%{NAGIOS_HOST_ALERT}|%{NAGIOS_SERVICE_FLAPPING_ALERT}|%{NAGIOS_HOST_FLAPPING_ALERT}|%{NAGIOS_SERVICE_DOWNTIME_ALERT}|%{NAGIOS_HOST_DOWNTIME_ALERT}|%{NAGIOS_PASSIVE_SERVICE_CHECK}|%{NAGIOS_PASSIVE_HOST_CHECK}|%{NAGIOS_SERVICE_EVENT_HANDLER}|%{NAGIOS_HOST_EVENT_HANDLER}|%{NAGIOS_TIMEPERIOD_TRANSITION}|%{NAGIOS_EC_LINE_DISABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_ENABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_DISABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_ENABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT}|%{NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT}|%{NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME}|%{NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS})grokky-0.1.0/patterns/nginx000066400000000000000000000020471417222633300157320ustar00rootroot00000000000000NGUSERNAME [a-zA-Z\.\@\-\+_%]+ NGUSER %{NGUSERNAME} # '$remote_addr - $remote_user [$time_local] ' # '"$request" $status $body_bytes_sent ' # '"$http_referer" "$http_user_agent"'; # 127.0.0.1 - - [28/Jan/2016:14:19:36 +0300] "GET /zero.html HTTP/1.1" 200 398 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36" NOTDQUOTE [^"]* DAY2 \d{2} #NGINXERRTIME %{YEAR:year}/%{MONTHNUM2:month}/%{DAY2:day} %{HOUR:hour}:%{MINUTE:minute}:%{SECOND:second} NGINXERRTIME %{YEAR}/%{MONTHNUM2}/%{DAY2} %{HOUR}:%{MINUTE}:%{SECOND} NGINXACCESS %{IPORHOST:remote_addr} - %{NGUSER:remote_user} \[%{HTTPDATE:time_local}\] "%{WORD:method} %{URIPATHPARAM:request} HTTP/%{NUMBER:http_version}" %{NUMBER:status} %{NUMBER:body_bytes_sent} "%{NOTDQUOTE:http_referer}" "%{NOTDQUOTE:http_user_agent}" # YYYY/MM/DD HH:MM:SS [LEVEL] PID#TID: *CID MESSAGE NGINXERROR %{NGINXERRTIME:time} \[%{LOGLEVEL:loglevel}\] %{NONNEGINT:pid}#%{NONNEGINT:tid}: (\*%{NONNEGINT:cid} )?%{GREEDYDATA:message} grokky-0.1.0/patterns/postgresql000066400000000000000000000002141417222633300170040ustar00rootroot00000000000000# Default postgresql pg_log format pattern POSTGRESQL %{DATESTAMP:timestamp} %{TZ} %{DATA:user_id} %{GREEDYDATA:connection_id} %{POSINT:pid}grokky-0.1.0/patterns/rails000066400000000000000000000017051417222633300157210ustar00rootroot00000000000000RUUID \s{32} # rails controller with action RAILS_CONSTROLLER [^#]+ RAIL_ACTION \w+ RCONTROLLER %{RAILS_CONSTROLLER:controller}#%{RAIL_ACTION:action} # this will often be the only line: RAILS_TIMESTAMP %{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{ISO8601_TIMEZONE} RAILS3HEAD (?m)Started %{WORD:verb} "%{URIPATHPARAM:request}" for %{IPORHOST:clientip} at %{RAILS_TIMESTAMP:timestamp} # for some a strange reason, params are stripped of {} - not sure that's a good idea. RPROCESSING \W*Processing by %{RCONTROLLER} as %{NOTSPACE:format}(?:\W*Parameters: {%{DATA:params}}\W*)? RAILS3PROFILE (?:\(Views: %{NUMBER:viewms}ms \| ActiveRecord: %{NUMBER:activerecordms}ms|\(ActiveRecord: %{NUMBER:activerecordms}ms)? RAILS3FOOT Completed %{NUMBER:response}%{DATA} in %{NUMBER:totalms}ms %{RAILS3PROFILE}%{GREEDYDATA} RAILS_CONTEXT (?:%{DATA}\n)* # putting it all together RAILS3 %{RAILS3HEAD}(?:%{RPROCESSING})?%{RAILS_CONTEXT:context}(?:%{RAILS3FOOT})?grokky-0.1.0/patterns/redis000066400000000000000000000016521417222633300157160ustar00rootroot00000000000000 # # Format 1: # # [43569] 27 Aug 12:38:58.471 * RDB: 12 MB of memory used by copy-on-write # # # Format 2: # # 31493:M 17 Sep 09:02:54.807 # Server started, Redis version 3.0.2 # 31493:M 17 Sep 09:02:54.807 # WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm$ # 31493:M 17 Sep 09:02:54.807 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128. # 31493:M 17 Sep 09:02:54.807 * DB loaded from disk: 0.000 seconds # 31493:M 17 Sep 09:02:54.807 * The server is now ready to accept connections on port 6379 # REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME} REDISLOG \[%{POSINT:pid}\] %{REDISTIMESTAMP:time} \*\s REDISLOG1 %{REDISLOG} REDISLOG2 %{POSINT:pid}:M %{REDISTIMESTAMP:time} [*#] %{GREEDYDATA:message}grokky-0.1.0/patterns/ruby000066400000000000000000000002671417222633300155720ustar00rootroot00000000000000RUBY_LOGLEVEL DEBUG|FATAL|ERROR|WARN|INFO RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- +%{DATA:progname}: %{GREEDYDATA:message}grokky-0.1.0/patterns_fail.txt000066400000000000000000000000271417222633300164140ustar00rootroot00000000000000ONE \d TWO %{THREE:two}grokky-0.1.0/patterns_pass.txt000066400000000000000000000001321417222633300164440ustar00rootroot00000000000000# # for testing # ONE \d TWO %{ONE:two} THREE %{ONE:one}-%{TWO}-%{ONE:three} # # enough #grokky-0.1.0/repository_test.go000066400000000000000000000035371417222633300166360ustar00rootroot00000000000000// // Copyright (c) 2016-2017 Konstanin Ivanov . // All rights reserved. This program is free software. It comes without // any warranty, to the extent permitted by applicable law. You can // redistribute it and/or modify it under the terms of the Do What // The Fuck You Want To Public License, Version 2, as published by // Sam Hocevar. See LICENSE file for more details or see below. // // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // Version 2, December 2004 // // Copyright (C) 2004 Sam Hocevar // // Everyone is permitted to copy and distribute verbatim or modified // copies of this license document, and changing it is allowed as long // as the name is changed. // // DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE // TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION // // 0. You just DO WHAT THE FUCK YOU WANT TO. // package grokky import ( "io/ioutil" fp "path/filepath" "testing" ) const repository = "patterns" func repoPath(pth string) string { return fp.Join(repository, pth) } func Test_repository(t *testing.T) { fis, err := ioutil.ReadDir(repository) if err != nil { t.Error(err) t.FailNow() } h := NewBase() for _, fi := range fis { t.Log("REPO:", fi.Name()) err := h.AddFromFile(repoPath(fi.Name())) if err != nil { t.Error(err) } } } func Test_ngaccess(t *testing.T) { h := NewBase() err := h.AddFromFile(repoPath("nginx")) if err != nil { t.Error(err) } line := `127.0.0.1 - - [28/Jan/2016:14:19:36 +0300] "GET /zero.html HTTP/1.1" 200 398 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36"` p, err := h.Compile("%{NGINXACCESS}") if err != nil { t.Error(err) } mss := p.Parse(line) if len(mss) == 0 { t.Error("nginx access not matched") } t.Log(mss) }