I try to figure out what solution is best for elasticsearch if my records will scale up to 658 million records per year. Actually now I have one index for all my records with 2 shards and 0 replicas. Also, I noticed that with 356k records and one index the sorting and searching are working faster than 365 indexes which have 1000 records. The question is, what is the best and faster way store the data in elastic for 658million records if I am going to sorting searching, and deleting records or indexes older than one year?
Elasticsearch 6.2 version, javascript api.
const defaultPageSize = 10
const indexTemplateSettings = {
number_of_shards: 2,
number_of_replicas : 0,
max_result_window: 1000000000,
'index.routing.allocation.enable': 'all',
}
const createClient = () =>
new elasticsearch.Client({
host: `${config.elastic.host}:${config.elastic.port}`,
log: config.elastic.logLevel,
httpAuth: `${config.elastic.userName}:${config.elastic.password}`,
})
export const get = ({index, skip = 0, pageSize = defaultPageSize, search, sort = {by: 'timestamp', direction: 'desc'}}) => new Promise(async resolve => {
try {
logger.silly(`getting data from elastic: index: ${index}, skip: ${skip}, pageSize: ${pageSize}`)
let client = createClient()
const sortSettings = {
order: `${sort.direction.toLowerCase()}`,
missing: '_last',
unmapped_type: 'long',
}
const params = {
from: skip,
size: pageSize || undefined,
index: `${index.toLowerCase()}`,
filter_path: 'hits.hits._source, hits.total',
body: {
query: {'match_all': {}},
sort: {
[`${sort.by}.keyword`]: sortSettings,
[`${sort.by}.seconds`]: sortSettings,
},
},
}
if (search) {
params.body.query = {
query_string : {
query: `*${search}* OR *${search}`,
analyze_wildcard: true,
},
}
}
await client.search(params,
(e, {hits: {hits: data = [], total: totalCount} = {hits: [], total: 0}} = {}) => {
logger.silly(`elastic searching completed. Result: contains ${totalCount} items`)
resolve({items: data.map(t => t._source), totalCount})
})
} catch (e) {
logger.error(e)
}
})
export const push = (message, type) => new Promise(async resolve => {
try {
let client = createClient()
let oneYearAgoTime = new Date(new Date().setFullYear(new Date().getFullYear() - 1)).toISOString().substring(0, 10)
let indexCreationTime = new Date('2016-04-27').toISOString().substring(0, 10)
await client.deleteByQuery({
index: type.toLowerCase(),
body: {
query: {
range: {
'_timestampIndex' : {
lte: oneYearAgoTime,
},
},
},
},
} , (error, response) => {
logger.silly('Deleted of data completed', response)
})
await client.index({
index: type.toLowerCase(),
type,
body: {
...message,
_timestampIndex: indexCreationTime,
},
},
(error, response) => {
logger.silly('Pushing of data completed', response)
resolve(response)
})
} catch (e) {
logger.error(e)
}
})