Im am trying to code this .conf file with more scalability, and my idea is to, in order to have multi index in elasticsearch, split the path and get the last position to have the csv name and set it to the type and index in elasticsearch.
import pandas as pd
import numpy as np
input {
file {
path => "/home/aitor2/RETO8/BIGDATA/df_suministro_activa.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
file {
path => "/home/aitor2/RETO8/BIGDATA/df_activo_consumo.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
path2 = path.split('/')[-1]
filter {
if [path] == "/home/aitor2/RETO8/BIGDATA/df_suministro_activa.csv"{
mutate { replace => { type => path2 } }
csv {
separator => ","
skip_header => "true"
autodetect_column_names => true
}
ruby {
code => "event.to_hash.each { |k, v|
if k.start_with?('Linea') and v.is_a?(String)
event.set(k, v.to_f)
end
}
"
}
}
else if [path] == "/home/aitor2/RETO8/BIGDATA/df_activo_consumo.csv"{
mutate { replace => { type => "apaches2" } }
csv {
separator => ","
skip_header => "true"
autodetect_column_names => true
}
ruby {
code => "event.to_hash.each { |k, v|
if k.start_with?('Smart') and v.is_a?(String)
event.set(k, v.to_f)
end
}
"
}
}
}
output {
elasticsearch {
hosts => "http://localhost:9200"
index => "%{type}_indexer"
}
stdout {codec => rubydebug}
}
I tried to do it with path2 = path.split('/')[-1]
but im not sure it is possible.
In the filter
part, set the value of type
to the filename (df_suministro_activa.csv
or df_activo_consumo.csv
). I use grok
for this ; mutate
is another possibility (cf doc).
You can then use type
in the output / in the if-else / change its value, etc.
input {
file {
path => "/home/aitor2/RETO8/BIGDATA/df_suministro_activa.csv"
...
}
file {
path => "/home/aitor2/RETO8/BIGDATA/df_activo_consumo.csv"
...
}
}
filter {
grok { match { "path" => "UNIXPATH/(?<type>[^/]+)" } }
if [type] == "df_suministro_activa.csv" {
...
}
else if [type] == "df_activo_consumo.csv" {
mutate { replace => { type => "apaches2" } }
...
}
}
output {
elasticsearch {
hosts => "http://localhost:9200"
index => "%{type}_indexer"
}
}
I am unsure about the path
field ; you may want to try [log][file][path]
instead of path
in the filter
block.