I am trying to transform an AVRO schema into an ElasticSearch index template. Both are JSON structured with a few things to check while transforming. I tried using recursion to get all the nested elements out and then pair them with their parents but writing to a dictionary while parsing deep with recursion compelled me to ask this question.
So basically I have this AVRO schema file:
{
"name": "animal",
"type": [
"null",
{
"type": "record",
"name": "zooAnimals",
"fields": [{
"name": "color",
"type": ["null", "string"],
"default": null
},
{
"name": "skinType",
"type": ["null", "string"],
"default": null
},
{
"name": "species",
"type": {
"type": "record",
"name": "AnimalSpecies",
"fields": [{
"name": "terrestrial",
"type": "string"
},
{
"name": "aquatic",
"type": "string"
}
]
}
},
{
"name": "behavior",
"type": [
"null",
{
"type": "record",
"name": "AnimalBehaviors",
"fields": [{
"name": "sound",
"type": ["null", "string"],
"default": null
},
{
"name": "hunt",
"type": ["null", "string"],
"default": null
}
]
}
],
"default": null
}
]
}
]
}
and I would like it to get transformed into this (Elasticsearch index template format):
{
"properties": {
"color" :{
"type" : "keyword"
},
"skinType" :{
"type" : "keyword"
},
"species" :{
"properties" : {
"terrestrial" : {
"type" : "keyword"
},
"aquatic" : {
"type" : "keyword"
},
}
},
"behavior" : {
"properties" : {
"sound" : {
"type" : "keyword"
},
"hunt" : {
"type" : "keyword"
}
}
}
}
}
Important Notes: The nesting on the AVRO schema could be furthermore nested and that's why I was thinking recursion to solve. Also, the type of the type
filed could be an Array
or a Map
as shown for behavior
vs. species
where behavior has an array and species has a map.
If you must see that I did my trial and error, here's my code that's not getting me anywhere:
const checkDataTypeFromObject = function (obj) {
if (Object.prototype.toString.call(obj) === "[object Array]") {
obj.map(function (item) {
if (Object.prototype.toString.call(item) === "[object Object]") {
// so this is an object that could contain further nested fields
dataType = item;
mappings.properties[item.name] = { "type" : item.type}
if (item.hasOwnProperty("fields")) {
checkDataTypeFromObject(item.fields);
} else if (item.hasOwnProperty("type")) {
checkDataTypeFromObject(item.type);
}
} else if (item === null) {
// discard the nulls, nothing to do here
} else {
// if not dict or null, this is the dataType we are looking for
dataType = item;
}
return item.name
});
We can break it down using inductive reasoning. The numbered points below correspond the the numbered comments in the code -
t
, is null, return an empty objectt
is not null. If t.type
is an object, transform
each leaf and sum into a single objectt
is not null and t.type
is not an object. If t.fields
is an object, transform
each leaf, assign to { [name]: ... }
, and sum into a single properties objectt
is not null and t.type
is not an object, and t.fields
is not an object. Return keyword.const transform = t =>
t === "null"
? {} // <- 1
: isObject(t.type)
? arr(t.type) // <- 2
.map(transform)
.reduce(assign, {})
: isObject(t.fields)
? { propertries: // <- 3
arr(t.fields)
.map(v => ({ [v.name]: transform(v) }))
.reduce(assign, {})
}
: { type: "keyword" } // <- 4
With a few helpers to keep complexity out of our way -
const assign = (t, u) =>
Object.assign(t, u)
const arr = t =>
Array.isArray(t) ? t : [t]
const isObject = t =>
Object(t) === t
Simply run the transform
-
console.log(transform(input))
Expand the snippet below to verify the result in your browser -
const assign = (t, u) =>
Object.assign(t, u)
const arr = t =>
Array.isArray(t) ? t : [t]
const isObject = t =>
Object(t) === t
const transform = t =>
t === "null"
? {}
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(assign, {})
: isObject(t.fields)
? { propertries:
arr(t.fields)
.map(v => ({ [v.name]: transform(v) }))
.reduce(assign, {})
}
: { type: "keyword" }
const input =
{name: "animal", type: ["null", {type: "record", name: "zooAnimals", fields: [{name: "color", type: ["null", "string"], default: null}, {name: "skinType", type: ["null", "string"], default: null}, {name: "species", type: {type: "record", name: "AnimalSpecies", fields: [{name: "terrestrial", type: "string"}, {name: "aquatic", type: "string"}]}}, {name: "behavior", type: ["null", {type: "record", name: "AnimalBehaviors", fields: [{name: "sound", type: ["null", "string"], default: null}, {name: "hunt", type: ["null", "string"], default: null}]}], default: null}]}]}
console.log(transform(input))
Output -
{
"propertries": {
"color": {
"type": "keyword"
},
"skinType": {
"type": "keyword"
},
"species": {
"propertries": {
"terrestrial": {
"type": "keyword"
},
"aquatic": {
"type": "keyword"
}
}
},
"behavior": {
"propertries": {
"sound": {
"type": "keyword"
},
"hunt": {
"type": "keyword"
}
}
}
}
}
nota bene
In step 2 we could have a complex type
such as -
{ name: "foo"
, type: [ "null", { obj1 }, { obj2 }, ... ]
, ...
}
In such a case, obj1
and obj2
may each transform
into a { properties: ... }
object. Using .reduce(assign, {})
means properties of obj1
will be overwritten by properties of obj2
-
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(assign, {}) // <- cannot simply use `assign`
To remedy this, we change step 2 to merge
complex types more intelligently -
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(merge, {}) // <- define a more sophisticated merge
Where merge
could be something like -
const merge = (t, u) =>
t.properties && u.properties // <- both
? { properties: Object.assign(t.properties, u.properties) }
: t.properties // <- only t
? { properties: Object.assign(t.properties, u) }
: u.properties // <- only u
? { properties: Object.assign(t, u.properties) }
: Object.assign(t, u) // <- neither
Or the same merge
but using a different logical approach -
const merge = (t, u) =.
t.properties || u.properties // <- either
? { properties:
Object.assign
( t.properties || t
, u.properties || u
)
}
: Object.assign(t, u) // <- neither