skip to Main Content

I have a huge (approx. 50GB) JSON file to deserialize. The JSON file consists of 14 arrays, and short example of it can be found here.

I wrote my POCO file, declaring 15 classes (one for each array, and a root class) and now I am trying to get my data in. Since the original data are huge and come in a zip file I am trying not to unpack the whole thing. Hence, the use of IO.Compression in the following code.

using System.IO.Compression;
using System.Text.Json;
using System.Text.Json.Nodes;

namespace read_and_parse
{
    internal class Program
    {
        static void Main() 
        {
            var fc = new Program();

            string zip_path = @"C:ProjectsBBRDownload_Totalexample_json.zip";
            using FileStream file = File.OpenRead(zip_path);
            using (var zip = new ZipArchive(file, ZipArchiveMode.Read))
            {
                foreach (ZipArchiveEntry entry in zip.Entries)
                {

                    string[] name_split = entry.Name.Split('_');
                    string name = name_split.Last().Substring(0, name_split.Last().Length - 5);
                    bool canConvert = long.TryParse(name, out long number1);
                    if (canConvert == true)
                    {
                        Task task = fc.ParseJsonFromZippedFile(entry);
                    }
                }
            }
        }

        private async Task ParseJsonFromZippedFile(ZipArchiveEntry entry)
        {
            JsonSerializerOptions options = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
            await using Stream entryStream = entry.Open();

            IAsyncEnumerable<JsonNode?> enumerable = JsonSerializer.DeserializeAsyncEnumerable<JsonNode>(entryStream, options);
            await foreach (JsonNode? obj in enumerable) 
            {
                // Parse only subset of the object
                JsonNode? bbrSagNode = obj?["BBRSaglist"];
                if (bbrSagNode is null) continue;
                else
                {
                    var bbrSag = bbrSagNode.Deserialize<BBRSagList>();                    
                }
            }

        }

    }
}

Unfortunately I do not get anything out of it and it fails in the foreach-loop of the task. It fails with a System.Threading.Tasks.VoidTaskResult.

How do I get the data deserialized?

3

Answers


  1. due to my company security i cant access to data example. please check if you have no root element. just a JsonArray/list …

    i made an example. try to use ToListAsync extension
    and you can deserialize each object and add to main list.. etc..

    void Main()
    {
        JsonSerializerOptions options = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
        var jobj = JsonObject.Parse("[{"name":"Tom Cruise","age":56,"Born At":"Syracuse, NY","Birthdate":"July 3, 1962","photo":"https://jsonformatter.org/img/tom-cruise.jpg"},{"name":"Robert Downey Jr.","age":53,"Born At":"New York City, NY","Birthdate":"April 4, 1965","photo":"https://jsonformatter.org/img/Robert-Downey-Jr.jpg"}]");
        
        var jStream = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(jobj.ToJsonString()));
        
        var _enumerable = Task.Run(() => System.Text.Json.JsonSerializer.DeserializeAsyncEnumerable<JsonNode>(jStream, options).ToListAsync());
        foreach (JsonNode obj in _enumerable.Result)
        {
            obj.Dump(obj["name"].ToString());
        }
    }
    
    
    public static class AsyncEnumerableExtensions
    {
        public static async Task<List<T>> ToListAsync<T>(this IAsyncEnumerable<T> items,
            CancellationToken cancellationToken = default)
        {
            var results = new List<T>();
            await foreach (var item in items.WithCancellation(cancellationToken)
                                            .ConfigureAwait(false))
                results.Add(item);
            return results;
        }
    }
    

    enter image description here

    Login or Signup to reply.
  2. The Main method should be marked as async, and you need to await the task returned by ParseJsonFromZippedFile:

    using System.IO.Compression;
    using System.Text.Json;
    using System.Text.Json.Nodes;
    using System.Threading.Tasks;
    
    namespace read_and_parse
    {
        internal class Program
        {
            static async Task Main() // Mark Main as async
            {
                var fc = new Program();
    
                string zip_path = @"C:ProjectsBBRDownload_Totalexample_json.zip";
                using FileStream file = File.OpenRead(zip_path);
                using (var zip = new ZipArchive(file, ZipArchiveMode.Read))
                {
                    foreach (ZipArchiveEntry entry in zip.Entries)
                    {
                        string[] name_split = entry.Name.Split('_');
                        string name = name_split.Last().Substring(0, name_split.Last().Length - 5);
                        bool canConvert = long.TryParse(name, out long number1);
                        if (canConvert == true)
                        {
                            await fc.ParseJsonFromZippedFile(entry); // Await the task
                        }
                    }
                }
            }
    
            private async Task ParseJsonFromZippedFile(ZipArchiveEntry entry)
            {
                JsonSerializerOptions options = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
                await using Stream entryStream = entry.Open();
    
                IAsyncEnumerable<JsonNode?> enumerable = JsonSerializer.DeserializeAsyncEnumerable<JsonNode>(entryStream, options);
                await foreach (JsonNode? obj in enumerable) 
                {
                    // Parse only subset of the object
                    JsonNode? bbrSagNode = obj?["BBRSaglist"];
                    if (bbrSagNode is null) continue;
                    else
                    {
                        var bbrSag = bbrSagNode.Deserialize<BBRSagList>();                    
                    }
                }
            }
        }
    }
    
    Login or Signup to reply.
  3. Your root JSON container is not an array, it’s an object:

    {
        "BBRSagList": [ /* Contents of BBRSagList */ ],
        ""BygningList": [ /* Contents of BygningList*/ ]
    }
    

    You will not be able to use JsonSerializer.DeserializeAsyncEnumerable<T> to deserialize such JSON because this method only supports async streaming deserialization of JSON arrays, not objects. And unfortunately System.Text.Json does not support streaming deserialization of objects, or even streaming in general, it supports pipelining. If you need to stream through a file using System.Text.Json you will need to build on this answer by mtosh to Parsing a JSON file with .NET core 3.0/System.text.Json.

    As an alternative, you could use Json.NET which is designed for streaming via JsonTextReader. Your JSON object consists of multiple array-valued properties, and using Json.NET you will be able to stream through your entryStream asynchronously, load each array value into a JToken, then call some callback for each token.

    First, introduce the following extension methods:

    public static partial class JsonExtensions
    {
        /// <summary>
        /// Asynchronously stream through a stream containing a JSON object whose properties have array values and call some callback for each value specified by property name
        /// The reader must be positioned on an object or an exception will be thrown.
        /// </summary>
        public static async Task StreamJsonObjectArrayPropertyValues(Stream stream, Dictionary<string, Action<JToken>> itemActions, CancellationToken cancellationToken = default)
        {
            // StreamReader and JsonTextReader do not implement IAsyncDisposable so let the caller dispose the stream.
            using (var textReader = new StreamReader(stream, leaveOpen : true))
            using (var reader = new JsonTextReader(textReader) { CloseInput = false })
            {
                await StreamJsonObjectArrayPropertyValues(reader, itemActions, cancellationToken).ConfigureAwait(false);
            }
        }
    
        /// <summary>
        /// Asynchronously stream through a given JSON object whose properties have array values and call some callback for each value specified by property name
        /// The reader must be positioned on an object or an exception will be thrown.
        /// </summary>
        public static async Task StreamJsonObjectArrayPropertyValues(JsonReader reader, Dictionary<string, Action<JToken>> actions, CancellationToken cancellationToken = default)
        {
            var loadSettings = new JsonLoadSettings { LineInfoHandling = LineInfoHandling.Ignore }; // For performance do not load line info.
            (await reader.MoveToContentAndAssertAsync(cancellationToken).ConfigureAwait(false)).AssertTokenType(JsonToken.StartObject);
            while ((await reader.ReadToContentAndAssertAsync(cancellationToken).ConfigureAwait(false)).TokenType != JsonToken.EndObject)
            {
                if (reader.TokenType != JsonToken.PropertyName)
                    throw new JsonReaderException();
                var name = (string)reader.Value!;
                await reader.ReadToContentAndAssertAsync().ConfigureAwait(false);
                if (actions.TryGetValue(name, out var action) && reader.TokenType == JsonToken.StartArray)
                {
                    await foreach (var token in reader.LoadAsyncEnumerable(loadSettings, cancellationToken).ConfigureAwait(false))
                    {
                        action(token);
                    }
                }
                else
                {
                    await reader.SkipAsync().ConfigureAwait(false);
                }
            }
        }
        
        /// <summary>
        /// Asynchronously load and return JToken values from a stream containing a JSON array.  
        /// The reader must be positioned on an array or an exception will be thrown.
        /// </summary>
        public static async IAsyncEnumerable<JToken> LoadAsyncEnumerable(this JsonReader reader, JsonLoadSettings? settings = default, [EnumeratorCancellation] CancellationToken cancellationToken = default)
        {
            (await reader.MoveToContentAndAssertAsync().ConfigureAwait(false)).AssertTokenType(JsonToken.StartArray);
            cancellationToken.ThrowIfCancellationRequested();
            while ((await reader.ReadToContentAndAssertAsync(cancellationToken).ConfigureAwait(false)).TokenType != JsonToken.EndArray)
            {
                cancellationToken.ThrowIfCancellationRequested();
                yield return await JToken.LoadAsync(reader, settings, cancellationToken).ConfigureAwait(false);
            }
            cancellationToken.ThrowIfCancellationRequested();
        }
    
        public static JsonReader AssertTokenType(this JsonReader reader, JsonToken tokenType) => 
            reader.TokenType == tokenType ? reader : throw new JsonSerializationException(string.Format("Unexpected token {0}, expected {1}", reader.TokenType, tokenType));
    
        public static async Task<JsonReader> ReadToContentAndAssertAsync(this JsonReader reader, CancellationToken cancellationToken = default) =>
            await (await reader.ReadAndAssertAsync(cancellationToken).ConfigureAwait(false)).MoveToContentAndAssertAsync(cancellationToken).ConfigureAwait(false);
    
        public static async Task<JsonReader> MoveToContentAndAssertAsync(this JsonReader reader, CancellationToken cancellationToken = default)
        {
            if (reader == null)
                throw new ArgumentNullException();
            if (reader.TokenType == JsonToken.None)       // Skip past beginning of stream.
                await reader.ReadAndAssertAsync(cancellationToken).ConfigureAwait(false);
            while (reader.TokenType == JsonToken.Comment) // Skip past comments.
                await reader.ReadAndAssertAsync(cancellationToken).ConfigureAwait(false);
            return reader;
        }
    
        public static async Task<JsonReader> ReadAndAssertAsync(this JsonReader reader, CancellationToken cancellationToken = default)
        {
            if (reader == null)
                throw new ArgumentNullException();
            if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
                throw new JsonReaderException("Unexpected end of JSON stream.");
            return reader;
        }
    }
    

    And now you will be able to do the following, to process the entries in the "BBRSagList" array:

    private static async Task ParseJsonFromZippedFile(ZipArchiveEntry entry)
    {
        await using Stream entryStream = entry.Open();
        Dictionary<string, Action<JToken>> actions = new ()
        {
            ["BBRSagList"] = ProcessBBRSagList,
        };
        await JsonExtensions.StreamJsonObjectArrayPropertyValues(entryStream , actions);
    }
    
    static void ProcessBBRSagList(JToken token)
    {
        // Handle each BBRSagList however you want.
        var brsagList = token.ToObject<BBRSagList>();
        
        Console.WriteLine("Deserialized {0}, result = {1}", brsagList, JsonConvert.SerializeObject(brsagList));
    }
    

    Notes:

    • As observed by Fildor-standswithMods in comments, you must also declare your Main() method as public static async Task Main() and also await ParseJsonFromZippedFile(entry)

      public static async Task Main()
      {
          string zip_path = @"C:ProjectsBBRDownload_Totalexample_json.zip";
          using FileStream file = File.OpenRead(zip_path);
          using (var zip = new ZipArchive(file, ZipArchiveMode.Read))
          {
              foreach (ZipArchiveEntry entry in zip.Entries)
              {
                  string[] name_split = entry.Name.Split('_');
                  string name = name_split.Last().Substring(0, name_split.Last().Length - 5);
                  bool canConvert = long.TryParse(name, out long number1);
                  if (canConvert == true)
                  {
                      await ParseJsonFromZippedFile(entry);
                  }
              }
          }
      }
      

      (I made ParseJsonFromZippedFile() a static method so there is no reason to allocate a Program instance.)

    Mockup fiddle here.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search