I have a huge JSON file (tens of thousand of objects, >100 MB file) I'm trying to parse in order to extract specific objects. Since the file is this big I'm trying to deserialize only the specific part I need (if it's possible, that is) without having to deserialize the whole file.
Said object should be found based on the the value of a specific property "arena_id":xxxxx
contained in every object, objects that are formatted like this (stripped down version):
{"object":"card","id":"61a908e8-6952-46c0-94ec-3962b7a4caef","oracle_id":"e70f5520-1b9c-4351-8484-30f0dc692e01","multiverse_ids":[460007],"mtgo_id":71000,"arena_id":69421}
In order to deserialize the whole file I wrote the following code:
public static RootObject GetCardFromBulkScryfall()
{
string s = null;
using (StreamReader streamReader = new StreamReader(Path.Combine(GetAppDataPath(), @"scryfall-default-cards.json")))
{
s = streamReader.ReadToEnd();
}
RootObject card = JsonConvert.DeserializeObject<RootObject>(s);
return card;
}
I'm not even sure if what I'm trying to do is possible but in case it wasn't my question is what's the best approach to handling a file this big without having to deserialize it whole.
Use JsonTextReader with JsonTextWriter to enumerate objects then deserialize them if their property has needed value.
This code takes 16MB of memory working with 112MB JSON file on my PC.
Let me know if you have questions or need fixes.
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
try
{
string jsonFilePath = "1.json";
string propName = "arena_id";
RootObject[] objects = SearchObjectsWithProperty<RootObject, int>(jsonFilePath, propName, 69421, CancellationToken.None).ToArray();
System.Diagnostics.Debugger.Break();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
System.Diagnostics.Debugger.Break();
}
}
static IEnumerable<T> SearchObjectsWithProperty<T, V>(string jsonFilePath, string propName, V propValue, CancellationToken cancellationToken) where V : IEquatable<V>
{
using (TextReader tr = File.OpenText(jsonFilePath))
{
using (JsonTextReader jr = new JsonTextReader(tr))
{
StringBuilder currentObjectJson = new StringBuilder();
while (jr.Read())
{
cancellationToken.ThrowIfCancellationRequested();
if (jr.TokenType == JsonToken.StartObject)
{
currentObjectJson.Clear();
using (TextWriter tw = new StringWriter(currentObjectJson))
{
using (JsonTextWriter jw = new JsonTextWriter(tw))
{
jw.WriteToken(jr);
string currObjJson = currentObjectJson.ToString();
JObject obj = JObject.Parse(currObjJson);
if (obj[propName].ToObject<V>().Equals(propValue))
yield return obj.ToObject<T>();
}
}
}
}
}
}
}
}
public class RootObject
{
public string @object { get; set; }
public string id { get; set; }
public string oracle_id { get; set; }
public int[] multiverse_ids { get; set; }
public int mtgo_id { get; set; }
public int arena_id { get; set; }
}
}