Search code examples
elasticsearchlogstashlogstash-configurationlogstash-jdbc

Logstash Sql_Last_value date format seems to irrelevant


Hi All i am using logstash to index document from MSSQL server to elasticsearch i using below config to for doing incremental indexing for that i am using using column called modified_date but having problem with dateformat.

below is my config

input {
jdbc {
jdbc_driver_library => "D:/Users/xxxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
jdbc_user => "xxx"
jdbc_password => "xxxx"
jdbc_paging_enabled => true
tracking_column => modified_date
use_column_value => true
clean_run => true
tracking_column_type => "timestamp"
schedule => "*/1 * * * *"
statement => "Select pl.policynumber,pl.policyholdername,pl.dob,pl.age,pl.client_address clientaddress,cl.claimnumber,Cl.claimtype,cl.modified_date modified_date,Cl.is_active from policy pl
inner join claim Cl on Cl.policynumber=pl.policynumber where cl.modified_date >:sql_last_value"
}
}
filter {
if [is_active] {
        mutate {    
            add_field => {
                "[@metadata][elasticsearch_action]" => "index"
            }
        }
        mutate {
            remove_field => [ "is_active","@version","@timestamp" ]
        }
    } else {
        mutate {    
            add_field => {
                "[@metadata][elasticsearch_action]" => "delete"
            }
        }
        mutate {
            remove_field => [ "is_active","@version","@timestamp" ]
        }
} 
}
output {
elasticsearch {
hosts => "https://e5a4a4a4de7940d9b12674d62eac9762.eastus2.azure.elastic-cloud.com:9243"
user => "elastic"
password => "xxxxx"
index => "xxxx"
action => "%{[@metadata][elasticsearch_action]}"
document_type => "_doc"
document_id => "%{claimnumber}"

}
stdout { codec => rubydebug }
}

Attached screen shot enter image description here

Date format seems to be wrong due to that each time it is picking all the documents instead of modified one sould some one provide insight on this issue?


Solution

  • I think you need to remove/comment the input parameter clean_run => true, this will make sql_last_value to be ignored which result every time complete data load.

    Adding additional (following) parameter can let you debug and track how the sql_last_value generating:

    last_run_metadata_path => "D:\logstash<version>\jdbc_lastrun\filename"
    

    In addition to that following is the typical input configuration of jdbc with optimal approach (prepared statement)

    jdbc {  jdbc_driver_library => "D:/Users/xxxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
            jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
            jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
            jdbc_user => "xxx"
            jdbc_password => "xxxx"
            jdbc_paging_enabled => true
            statement => "Select pl.policynumber,pl.policyholdername,pl.dob,pl.age,pl.client_address clientaddress,cl.claimnumber,cl.claimtype,cl.modified_date modified_date,cl.is_active from policy pl inner join claim cl on cl.policynumber=pl.policynumber where cl.modified_date > (?)"
            use_prepared_statements => "true"
            prepared_statement_bind_values => [":sql_last_value"]
            prepared_statement_name => "jdbc_input_query1"
            tracking_column => modified_date
            #clean_run => true
            tracking_column_type => "date"
            schedule => "*/1 * * * *"
            last_run_metadata_path => "D:\logstash<version>\jdbc_lastrun\filename"
    }