I have a problem on logstash
where my workers jump to 100% CPU while activating 1 specific filter. Im currently using this input + filter (on which grok parses ok):
input {
redis {
key => "logs_services_dyna_customer.lnxdev01"
data_type => ['list']
db => 8
host => "kibana.test.local"
type => "D_Customer"
tags => ["d_local"]
add_field => {
server => "lnxdev01"
}
codec => "plain"
}
}
filter {
if [type] in ["D_Customer"] {
mutate {
gsub => ["message", "\r", ""]
}
grok {
patterns_dir => "/etc/logstash/patterns/"
match => ["message", "^Request Headers:\n(%{WORD:request_method} %{DATA:service_url} %{DATA}\n)?(Host: %{HOSTNAME:request_host}\n)?(Connection: %{WORD:request_connection}\n)?(User-Agent: %{DATA:user_agent}\n)?(Content-Type: %{DATA:content_type}\n)?(SOAPAction: \"%{DATA:request_action}\"\n)?(Content-Length: %{NUMBER:request_content_length}\n)?(Cookie: %{DATA}\n)?(:?X-START-DATE: %{APACHE_DATETIME:request_start_date})?\n+Request Body:\n%{GREEDIERDATA:request_body}\n+Response Headers:\n(HTTP/%{DATA}(?<response_code>\d{3}) %{DATA:response_message}\n)?(Cache-Control: %{DATA}\n)?(Content-Length: %{NUMBER:response_content_length}\n)?(Content-Type: %{DATA:response_content_type}\n)?(Server: %{DATA:response_server}\n)?(X-AspNet-Version: %{DATA}\n)?(X-Powered-By: %{DATA}\n)?(Date: %{APACHE_DATETIME:response_date}\n)?(Connection: %{WORD:response_connection}\n)?\n+Response Body:\n%{GREEDIERDATA:response_body}$"]
add_field => ["category", "Service"]
}
date {
match => ["request_start_date", "EEE, dd MMMM YYYY HH:mm:ss z"]
}
date {
match => ["request_start_date", "EEE, dd MMMM YYYY HH:mm:ss z"]
target => "request_start_date"
}
date {
match => ["response_date", "EEE, dd MMMM YYYY HH:mm:ss z"]
target => "response_date"
}
}
}
And below the log itself:
Request Headers:
POST /Middleware.OIL.I01_SearchCustomer.Wcf/WcfService_Middleware_OIL_I01_SearchCustomer_App.svc HTTP/1.1
Host: w12vfdbiz101-t.dy.nl
Connection: close
User-Agent: PHP-SOAP/7.0.9-1+deb.sury.org~trusty+1
Content-Type: text/xml; charset=utf-8
SOAPAction: "SearchCustomer"
Content-Length: 1628
X-START-DATE: Mon, 07 Nov 2016 13:27:03 UTC
Request Body:
<?xml version="1.0" encoding="UTF-8"?>
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns1="http://www.dy.eu/Customer/V1-0"><SOAP-ENV:Body><v1:SearchCustomerRequest xmlns:v1="http://www.dy.eu/Customer/V1-0" xmlns:v2="http://dy/CoreComponents/Schemas/dy/V2-0" xmlns:urn="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2" xmlns:urn1="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2" xmlns:v21="http://dy/CoreComponents/Schemas/dy/V2-0">
<v2:HeaderInfo>
<v2:TransactionId>58208127a1179</v2:TransactionId>
<v2:UserId>1</v2:UserId>
<v2:ApplicationId>Omnius</v2:ApplicationId>
<v2:PartyName>OGW</v2:PartyName>
</v2:HeaderInfo>
<v1:SearchedAccount>
<urn:PartyName>
<urn1:Name/>
</urn:PartyName>
</v1:SearchedAccount>
<v1:SearchedContact>
<v21:Role>
<v21:Person>
<urn1:ID/>
<urn1:FirstName>Friedrich</urn1:FirstName>
<urn1:FamilyName>Muller</urn1:FamilyName>
<urn:Contact>
<urn1:Telephone/>
<urn1:ElectronicMail/>
</urn:Contact>
<urn:ResidenceAddress>
<urn1:StreetName/>
<urn1:BuildingNumber/>
<urn1:CityName/>
<urn1:PostalZone/>
</urn:ResidenceAddress>
</v21:Person>
</v21:Role>
</v1:SearchedContact>
<v1:SearchedEquipment>
<urn:ManufacturersItemIdentification>
<urn1:ID/>
</urn:ManufacturersItemIdentification>
</v1:SearchedEquipment>
</v1:SearchCustomerRequest></SOAP-ENV:Body></SOAP-ENV:Envelope>
Response Headers:
HTTP/1.1 200 OK
Cache-Control: private
Content-Length: 2379
Content-Type: text/xml; charset=utf-8
Server: Microsoft-IIS/8.5
X-AspNet-Version: 4.0.30319
X-Powered-By: ASP.NET
Date: Mon, 07 Nov 2016 13:27:10 GMT
Connection: close
Response Body:
<s:Envelope xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"><s:Body><ns0:SearchCustomerResponse xmlns:ns0="http://www.dy.eu/Customer/V1-0" xmlns:ns1="http://uri.etsi.org/01903/v1.3.2#" xmlns:ns2="urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2" xmlns:ns3="urn:oasis:names:specification:ubl:schema:xsd:SignatureBasicComponents-2" xmlns:ns4="http://www.w3.org/2000/09/xmldsig#" xmlns:ns5="urn:oasis:names:specification:ubl:schema:xsd:QualifiedDataTypes-2" xmlns:ns6="urn:oasis:names:specification:ubl:schema:xsd:UnqualifiedDataTypes-2" xmlns:ns7="urn:un:unece:uncefact:data:specification:CoreComponentTypeSchemaModule:2" xmlns:ns8="urn:oasis:names:specification:ubl:schema:xsd:SignatureAggregateComponents-2" xmlns:ns9="http://uri.etsi.org/01903/v1.4.1#" xmlns:ns10="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2" xmlns:ns11="http://Dy/CoreComponents/Schemas/Dy/V2-0" xmlns:ns12="urn:oasis:names:specification:ubl:schema:xsd:CommonSignatureComponents-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><cbc:ReferenceID>8</cbc:ReferenceID><ns10:Status xsi:type="ns10:StatusType"><cbc:StatusReasonCode/><cbc:StatusReason/></ns10:Status><ns11:Success>true</ns11:Success><ns0:ReturnedNumberOfRecords>1</ns0:ReturnedNumberOfRecords><ns11:Account><ns10:PartyName><cbc:Name>CompanyName_0</cbc:Name></ns10:PartyName><ns10:PostalAddress xsi:type="ns10:AddressType"><cbc:StreetName>Street0</cbc:StreetName><cbc:BuildingNumber>1</cbc:BuildingNumber><cbc:CityName>City0</cbc:CityName><cbc:PostalZone>12345</cbc:PostalZone></ns10:PostalAddress><ns10:Contact xsi:type="ns10:ContactType"><cbc:ID>KS_12366</cbc:ID><cbc:ElectronicMail/></ns10:Contact><ns11:AccountCategory>KD</ns11:AccountCategory><ns11:AccountClass>P</ns11:AccountClass><ns11:ParentAccountNumber><cbc:ID>555</cbc:ID></ns11:ParentAccountNumber><ns11:BankAccountNumber>1234</ns11:BankAccountNumber><ns11:Role><ns11:Person><cbc:FirstName>Friedrich</cbc:FirstName><cbc:FamilyName>Müller</cbc:FamilyName><cbc:BirthDate>19700101</cbc:BirthDate><ns11:ContactPhoneNumber><ns11:PhoneNumber>0612387777</ns11:PhoneNumber><ns11:Type>Mobile</ns11:Type></ns11:ContactPhoneNumber></ns11:Person></ns11:Role></ns11:Account></ns0:SearchCustomerResponse></s:Body></s:Envelope>
So the question is, how come that logstash is spiking to 100% CPU and is this because of below filter? Should it be different or can it be more efficient?
Thanks Pim
The issue was that somebody used GREEDIERDATA. This meant it will match on .* \n \r. If grok needs to parse it will get in a massive loop since it will not know when to stop at all and needs to iterate the complete entry all the time