Am trying to turn all scalar values in any given JSON into an empty string.
Given i have a JSON column called arbitraryjsonvalues in mysql database table called ITEMS and the arbitraryjsonvalues column have the following values.
arbitraryjsonvalues
===================
{"foo": {"big": {"cat": 16, "dog": 90}, "boo": ["babe1", "babe2", "babe3"], "small": ["paradise", "hell"]}, "goo": "heavens gate", "yeah": "rice pot on fire"}
{"foo": {"big": {"cone": 90, "cylinder": 16}, "lover": ["lover1", "lover2", "lover3"], "small": ["banner", "logo"]}, "yeah": "pizza on the table"}
{"foo": {"big": {"ape": 16, "humans": 90}, "kalo": ["kalo1", "kalo2", "kalo3"], "small": ["paradise", "hell"]}, "goo": "heavens gate", "yeah": "rice pot on fire", "freak": "bed"}
{"fcs": ["arsenal", "chelsea", "man utd", "leicester", "inter milan"], "foo": {"big": {"ape": 16, "humans": 90}, "kalo": ["kalo1", "kalo2", "kalo3"], "small": ["paradise", "hell"]}, "goo": "heavens gate", "yeah": "rice pot on fire", "freak": "bed"}
{"a": "dsd"}
{"foo": {"ll": true, "boo": ["", "", {"cc": {"dd": ["", true, "", 43], "gg": true}, "jj": "fu"}, "", 90, false, true]}, "lls": 90, "mmn": 9, "bbbd": "ad_a", "lulu": "adfdasf", "_alago": {"a": 4}}
{"phone": "+234809488485"}
{"foo": {"big": {"cat": 16, "dog": 90}, "boo": ["", "", ""], "small": ["http://koks.com", ""]}, "goo": "+2345554444 677888", "yeah": "rice pot on fire"}
{"ll": true, "boo": ["http://kij.com", "___89jjjjdhfhhf8jjd__", {"cc": {"dd": ["", true, "", 43], "gg": true}, "jj": "fu"}, "", 90, false, true]}
{"ll": true, "boo": ["http://kij.com", "___+++++89jjjjdhfhhf8jjd__", {"cc": {"dd": ["", true, "", 43], "gg": true}, "jj": "fu"}, "", 90, false, true]}
{"ll": true, "boo": ["http://kij.com", "___+++++", {"cc": {"dd": ["", true, "", 43], "gg": true}, "jj": "fu"}, "", 90, false, true]}
{"gg": ["a", {"ll": "pink"}]}
{"gg": ["a", {"ll": ["pink", "orange"]}]}
I have created the following stored procedure to enable me merge all json values in a JSON column.
MERGE_ALL_JSON Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `MERGE_ALL_JSON`(IN `$LIMIT` INT)
BEGIN
DECLARE `$LIST` LONGTEXT;
DECLARE `$REMAINING` INT;
DECLARE `$PAGE` INT;
DECLARE `$TOTAL_JSON_OBJECT_COUNT` INT;
DECLARE `$OFFSET` INT;
DECLARE `$NEXT` TEXT;
DECLARE `$NEXT_LENGTH` INT;
DECLARE `$VALUE` TEXT;
DECLARE `$COUNTER` INT;
DECLARE `$MERGED_JSON` JSON;
SET `$MERGED_JSON` = '{}';
SET `$OFFSET` = 1;
CALL
GET_TOTAL_JSON_OBJECT_COUNT(`$TOTAL_JSON_OBJECT_COUNT`);
CALL CALCULATE_PAGE_COUNT(
`$LIMIT`,`$TOTAL_JSON_OBJECT_COUNT`,`$PAGE`
);
WHILE `$OFFSET` <= `$PAGE`
DO
CALL GET_JSON_LIST(`$LIMIT`, `$OFFSET`, `$LIST`);
SET `$COUNTER` = 0;
SELECT `$LIST`;
iterator:
LOOP
IF CHAR_LENGTH(TRIM(`$LIST`)) = 0 OR `$LIST` IS NULL
THEN
LEAVE iterator;
END IF;
SET `$NEXT` = SUBSTRING_INDEX(`$LIST`,"__|__',",1);
SET `$NEXT_LENGTH` = CHAR_LENGTH(`$NEXT`);
SET `$COUNTER` = `$COUNTER` + 1;
SET `$REMAINING` = (`$TOTAL_JSON_OBJECT_COUNT` - ((`$OFFSET` - 1)*`$LIMIT`));
IF `$OFFSET` = `$PAGE` AND `$COUNTER` = `$REMAINING` THEN
SET `$NEXT` = SUBSTRING_INDEX(`$NEXT`, "__|__'", 1);
END IF;
SET `$VALUE` = CONCAT(TRIM(`$NEXT`), "'");
SET `$VALUE` = substring(`$VALUE`, 2, length(`$VALUE`) - 2);
SET `$MERGED_JSON` = JSON_MERGE_PATCH(
`$MERGED_JSON`,`$VALUE`
);
SET `$LIST` = INSERT(`$LIST`,1,`$NEXT_LENGTH` + CHAR_LENGTH("__|__',"),'');
END LOOP;
SET `$OFFSET` = `$OFFSET` + 1;
END WHILE;
SELECT `$MERGED_JSON`;
END$$
DELIMITER ;
GET_JSON_LIST Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `GET_JSON_LIST`(IN `$LIMIT` INT, IN `$OFFSET` INT, OUT `$LIST` LONGTEXT)
BEGIN
DECLARE `$NEWOFFSET` INT;
SET `$NEWOFFSET` = (`$OFFSET`-1)*`$LIMIT`;
SET @t = @@group_concat_max_len;
SET @@group_concat_max_len = 4294967295899999;
SET `$LIST` = (SELECT
GROUP_CONCAT(
(SELECT DISTINCT
CONCAT(
"'",
arbitraryjsonvalues,
"__|__'"
))
)
FROM (
SELECT DISTINCT arbitraryjsonvalues
FROM ITEMS
WHERE arbitraryjsonvalues != JSON_OBJECT() AND
JSON_TYPE(arbitraryjsonvalues) = "OBJECT"
LIMIT `$NEWOFFSET`, `$LIMIT`
) as jsonvalues);
SET @@group_concat_max_len = @t;
END$$
DELIMITER ;
GET_TOTAL_JSON_OBJECT_COUNT Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `GET_TOTAL_JSON_OBJECT_COUNT`(OUT `$TOTAL_JSON_OBJECT_COUNT` INT)
BEGIN
SELECT COUNT(*) FROM (
SELECT DISTINCT arbitraryjsonvalues
FROM ITEMS
WHERE JSON_TYPE(arbitraryjsonvalues) = "OBJECT" AND
arbitraryjsonvalues != JSON_OBJECT()
) as distinctcount INTO `$TOTAL_JSON_OBJECT_COUNT`;
END$$
DELIMITER ;
finally, CALCULATE_PAGE_COUNT Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `CALCULATE_PAGE_COUNT`(IN `$LIMIT` INT, IN `$TOTAL` INT, OUT `$PAGE` INT)
BEGIN
DECLARE `$REMAINDER` INT;
SET `$PAGE` = `$TOTAL` / `$LIMIT`;
SET `$REMAINDER` = `$TOTAL` - (`$PAGE`*`$LIMIT`);
IF `$REMAINDER` > 0 THEN
SET `$PAGE` = `$PAGE` + 1;
END IF;
END$$
DELIMITER ;
However, I discovered that two JSON values with the same structure but different scalar values are distinct to one another, which means an attempt to merge all values in JSON column may fetch too many values ranging to thousands or more for processing; meanwhile, each of this JSON values will possibly not vary that much in structure, so I believe it will be good to turn all scalar values into an empty string, which is the problem I am currently trying to solve.
Afterward, I will be looking to turn series of empty strings in an array into a single empty string, for example
["", "", "", {....}, "", ""] will be equal to ["", {}]
["", "", "", {"a": ["", ""]}, "", ""] will be equal to ["", {"a": [""]}]
So if I can solve this two problems, which is to have a regex to turn all the scalar values into an empty string and a regex to turn series of empty string as described above, I will be able to figure out where I can use the regex in MYSQL statement of the above procedures to fetch distinct json values.
I believe by doing so, i will be able to fetch not too many distinct values. and my goal is to merge any given JSON column in a split of seconds. I don't care to have the scalar values, all i wanted is for my backend to automatically determine the structure and send it to the frontend to display a tree or collapsing tree where each node can have a text field to allow for a more context-specific search.
So whenever a node is searched, the frontend is aware of the path and the backend knows where to get the data...
Trying to implement some sort of reusable backend for searching JSON column of any given mysql database table column with JSON type.
I am using the following regex below to perform this action but it matches not only the scalar values but also the key. wish to get one that will match only the scalar values and not the keys.
true|false|\d+|("(?:(?!")((?:\\"|[^"])*))*")
Given that I have this merged JSON value for testing
{
"a": "dsd",
"ll": [true, true, true],
"boo": [
"http://kij.com",
"___89jjjjdhfhhf8jjd__",
{
"cc": {
"dd": ["", true, "", 43],
"gg": true
},
"jj": "f'u"
},
"",
90,
false,
true,
"http://kij.com",
"___+++++89jjjjdhfhhf8jjd__",
{
"cc": {
"dd": ["", true, "", 43],
"gg": true
},
"jj": "fu"
},
"",
90,
false,
true,
"http://kij.com",
"___+++++",
{
"cc": {
"dd": ["", true, "@8jkk=", 43],
"gg": true
},
"jj": "fu@"
},
"",
90,
false,
true
],
"fcs": ["arsenal", "chelsea", "man utd", "leicester", "inter milan"],
"foo": {
"ll": true,
"big": {
"ape": [16, 16],
"cat": [16, 16],
"dog": [90, 90],
"cone": 90,
"humans": [90, 90],
"cylinder": 16
},
"boo": ["babe1", "babe2", "babe3", "", "", {
"cc": {
"dd": ["", true, "", 43],
"gg": true
},
"jj": "fu"
}, "", 90, false, true, "", "", ""],
"kalo": ["kalo1", "kalo2", "kalo3", "kalo1", "kalo2", "kalo3"],
"lover": ["lover1", "lover2", "lover3"],
"small": ["paradise", "hell", "banner", "logo", "paradise", "hell", "paradise", "hell", "http://koks.com", ""]
},
"goo": ["heavens gate", "heavens gate", "heavens gate", "+2345554444 677888"],
"lls": 90,
"mmn": 9,
"bbbd": "ad_a",
"lulu": "adfdasf",
"yeah": ["rice pot on fire", "pizza on the table", "rice pot on fire", "rice pot on fire", "rice pot on fire"],
"freak": ["bed", "bed"],
"phone": "+2347777777",
"_alago": {"a": "$4m-jkk+=$900"}
}
Here is a link for you to test it
Please i need someone to help me solve this problem
after digging this is what I've come up with..
GET_JSON_LIST
procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `GET_JSON_LIST`(IN `$LIMIT` INT, IN `$OFFSET` INT, OUT `$LIST` LONGTEXT)
BEGIN
DECLARE `$NEWOFFSET` INT;
SET `$NEWOFFSET` = (`$OFFSET`-1)*`$LIMIT`;
SET @t = @@group_concat_max_len;
SET @@group_concat_max_len = 4294967295899999;
SET @LIST = '{}';
SET @TEMP=(
SELECT GROUP_CONCAT(
@LIST:=JSON_MERGE_PRESERVE(
@LIST,
jsonvalues
)
)
FROM (
SELECT DISTINCT REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
arbitraryjsonvalues,
'"(\\w+)":',
'$1:'
),
'(true|false|\\d+|("(?:(?!")((?:\\\\"|[^"])*))*"))',
'""'
),
'(\\w*):',
'"$1":'
),
'(?:""\\s*,\\s*){2,}',
'"",'
),
'(?:"",\\s*"")+]',
'""]'
) as jsonvalues
FROM ITEMS
WHERE JSON_TYPE(arbitraryjsonvalues) = "OBJECT" AND arbitraryjsonvalues != JSON_OBJECT()
LIMIT `$NEWOFFSET`, `$LIMIT`
) AS arbitval2
);
SET @@group_concat_max_len = @t;
SET `$LIST` = REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
@LIST,
'"(\\w+)":',
'$1:'
),
'(true|false|\\d+|("(?:(?!")((?:\\\\"|[^"])*))*"))',
'""'
),
'(\\w*):',
'"$1":'
),
'(?:""\\s*,\\s*){2,}',
'"",'
),
'(?:"",\\s*"")+]',
'""]'
);
END$$
DELIMITER ;
GET_TOTAL_JSON_OBJECT_COUNT
Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `GET_TOTAL_JSON_OBJECT_COUNT`(OUT `$TOTAL_JSON_OBJECT_COUNT` INT)
BEGIN
SELECT COUNT(*)
FROM (
SELECT DISTINCT REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
arbitraryjsonvalues,
'"(\\w+)":',
'$1:'
),
'(true|false|\\d+|("(?:(?!")((?:\\\\"|[^"])*))*"))',
'""'
),
'(\\w*):',
'"$1":'
),
'(?:""\\s*,\\s*){2,}',
'"",'
),
'(?:"",\\s*"")+]',
'""]'
) AS total
FROM ITEMS
WHERE arbitraryjsonvalues != JSON_OBJECT() AND arbitraryjsonvalues != JSON_ARRAY()
) as distinctcount INTO `$TOTAL_JSON_OBJECT_COUNT`;
END$$
DELIMITER ;
MERGE_ALL_JSON
Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `MERGE_ALL_JSON`(IN `$LIMIT` INT)
BEGIN
DECLARE `$LIST` LONGTEXT;
DECLARE `$PAGE` INT;
DECLARE `$TOTAL_JSON_OBJECT_COUNT` INT;
DECLARE `$OFFSET` INT;
DECLARE `$NEXT` TEXT;
DECLARE `$NEXT_LENGTH` INT;
DECLARE `$VALUE` TEXT;
DECLARE `$MERGED_JSON` JSON;
SET `$MERGED_JSON` = '{}';
SET `$OFFSET` = 1;
CALL
GET_TOTAL_JSON_OBJECT_COUNT(`$TOTAL_JSON_OBJECT_COUNT`);
CALL CALCULATE_PAGE_COUNT(`$LIMIT`,`$TOTAL_JSON_OBJECT_COUNT`,`$PAGE`);
WHILE `$OFFSET`<=`$PAGE`
DO
CALL GET_JSON_LIST(`$LIMIT`, `$OFFSET`, `$LIST`);
SET `$MERGED_JSON` = JSON_MERGE_PRESERVE(`$MERGED_JSON`,`$LIST`);
SET `$OFFSET`=(`$OFFSET`+1);
END WHILE;
SELECT `$MERGED_JSON`;
END$$
DELIMITER ;
CALCULATE_PAGE_COUNT
Procedure
DELIMITER $$
CREATE DEFINER=`root`@`%` PROCEDURE `CALCULATE_PAGE_COUNT`(IN `$LIMIT` INT, IN `$TOTAL` INT, OUT `$PAGE` INT)
BEGIN
DECLARE `$REMAINDER` INT;
SET `$PAGE` = `$TOTAL` / `$LIMIT`;
SET `$REMAINDER` = `$TOTAL` - (`$PAGE`*`$LIMIT`);
IF `$REMAINDER` > 0 THEN
SET `$PAGE` = `$PAGE` + 1;
END IF;
END$$
DELIMITER ;
Running MERGE_ALL_JSON
Procedure produces the result below, and this solves my problem somehow, but I am looking for a way to merge all objects inside an array into one with REGEX_REPLACE and JSON_MERGE_PRESERVE.
{
"a": "",
"gg": ["", {"ll": ""}, "", {"ll": [""]}],
"ll": "", "boo": ["", {"cc": {"dd": [""], "gg": ""}, "jj": ""}, ""],
"fcs": [""],
"foo": [
{
"ll": "",
"big": {
"ape": [""],
"cat": [""],
"dog": [""],
"cone": "",
"humans": [""],
"cylinder": ""
},
"boo": ["", {"cc": {"dd": [""], "gg": ""}, "jj": ""}, ""],
"kalo": [""],
"lover": [""],
"small": [""]
},
"",
{"bag": ""}
],
"goo": [""],
"lls": "",
"mmn": "",
"bbbd": "",
"lulu": "",
"yeah": ["", {"jj": ""}, "", {"jj": ""}, ""],
"freak": [""],
"light": "",
"phone": "",
"_alago": {"a": ""}
}
Let me drop this answer here, it might help someone.