Logstash multiline log for a thesql query

logstash

I'm looking to push logs from mysql-proxy lua's script into lostash. An example log might be

[2015-03-09 11:13:47] USER:username IP:10.102.51.134:41420 DB:dbName Query: -- One Pager Trends 
-- params:

SELECT 
  date,
  SUM(t.rev) revenue,
  SUM(t.rev - t.cost) profit 
FROM
  am.s_d t
  INNER JOIN am.event e 
    ON t.`event_id` = e.`event_id`
WHERE 1=1 AND DATE BETWEEN '2014-12-08' AND '2015-03-08'
  AND t.source_id = 25
GROUP BY date
[2015-03-09 11:17:28] USER:mzupan IP:10.102.22.216:49843 DB: Query: show databases

A new log entry will always start with [

So i'm shipping the logs using logstash-forwarder and processing like

filter {

  if [type] == "mysql-proxy" {
    grok {
      match => { "message" => "\[%{TIMESTAMP_ISO8601}\] USER:%{WORD:user} IP:%{IP:ip}:%{INT} DB:%{DATA:db} Query: (?<query>(.|\r|\n)*)" }
    }
    multiline {
      pattern => "^\["
      what => "previous"
      negate=> true
    }
    date {
      match => [ "timestamp", "yyyy-MM-dd HH:mm:ss" ]
    }
  }
}

My issue is in kibana I see the query like the following json

{
  "_index": "logstash-2015.03.09",
  "_type": "mysql-proxy",
  "_id": "AUv_vj3u0BuDzneUoKKc",
  "_score": null,
  "_source": {
    "message": "[2015-03-09 11:13:47] USER:username IP:10.102.51.134:41420 DB:dbName Query: -- One Pager Trends \n-- params:\n\nSELECT \n  date,\n  SUM(t.rev) revenue,\n  SUM(t.rev - t.cost) profit \nFROM\n  am.s_d t\n  INNER JOIN am.event e \n    ON t.`event_id` = e.`event_id`\nWHERE 1=1 AND DATE BETWEEN '2014-12-08' AND '2015-03-08'\n  AND t.source_id = 25\nGROUP BY date",
    "@version": "1",
    "@timestamp": "2015-03-09T18:13:52.287Z",
    "type": "mysql-proxy",
    "file": "/var/log/mysql-queries.log",
    "host": "an01.domain.com",
    "offset": [
      "11855847",
      "11855943",
      "11855954",
      "11855955",
      "11855963",
      "11855971",
      "11855993",
      "11856023",
      "11856028",
      "11856039",
      "11856064",
      "11856099",
      "11856156",
      "11856179",
      "11856193",
      "11856194"
    ],
    "user": "username",
    "ip": "10.102.51.134",
    "db": "dbname",
    "query": "-- One Pager Trends ",
    "tags": [
      "_grokparsefailure",
      "multiline"
    ]
  },
  "fields": {
    "@timestamp": [
      1425924832287
    ]
  },
  "sort": [
    1425924832287
  ]
}

I'm only seeing the first part even though logstash seems to be setting the message correctly.

Best Answer

Multiline in your filter should be placed before the match part. Try configuring it like this:

filter {
  if [type] == "mysql-proxy" {
    multiline {
      pattern => "^\["
      what    => "previous"
      negate  => true
    }
    grok {
      match => { "message" => "\[%{TIMESTAMP_ISO8601}\] USER:%{WORD:user} IP:%{IP:ip}:%{INT} DB:%{DATA:db} Query: (?(.|\r|\n)*)" }
    }
    date {
      match => [ "timestamp", "yyyy-MM-dd HH:mm:ss" ]
    }
  }

This works for me with logstash v1.4.2.

Related Topic