wham12 wham12 - 7 months ago 56
JSON Question

JSON.net serialize directly from oledbconnection

I currently have a handler which takes the filepath and tabname for an excel file, processes the file into a datatable and then serializes the table into a json string to return.
This is working until I try to process large file, and then I get an out of memory exception.

I was thinking that it would reduce the memory usage if I didn't load everything into the datatable first, and instead loaded straight into the json string. However, I have been unable to find any examples of how to do this.

Can I serialize directly from the OleDbConnection into a string? How?

public void ProcessRequest(HttpContext context)
{
string path = context.Request["path"];
string tableNames = context.Request["tableNames"];

string connectionString = string.Empty;
if (path.EndsWith(".xls"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
}
else if (path.EndsWith(".xlsx"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
}
DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");

DbDataAdapter adapter = factory.CreateDataAdapter();
OleDbConnection conn = new OleDbConnection(connectionString);
conn.Open();

DataTable tmp = new DataTable();

DbCommand selectCommand = factory.CreateCommand();

selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", tableNames);
selectCommand.Connection = conn;
adapter.SelectCommand = selectCommand;


adapter.Fill(tmp);
string tabdata = JsonConvert.SerializeObject(tmp);
context.Response.Write(tabdata);
}

dbc dbc
Answer

Firstly, you should stop serializing to an intermediate string and instead serialize directly to the HttpResponse.OutputStream, using the following simple methods:

public static class JsonExtensions
{
    public static void SerializeToStream(object value, System.Web.HttpResponse response, JsonSerializerSettings settings = null)
    {
        if (response == null)
            throw new ArgumentNullException("response");
        SerializeToStream(value, response.OutputStream, settings);
    }

    public static void SerializeToStream(object value, TextWriter writer, JsonSerializerSettings settings = null)
    {
        if (writer == null)
            throw new ArgumentNullException("writer");
        var serializer = JsonSerializer.CreateDefault(settings);
        serializer.Serialize(writer, value);
    }

    public static void SerializeToStream(object value, Stream stream, JsonSerializerSettings settings = null)
    {
        if (stream == null)
            throw new ArgumentNullException("stream");
        using (var writer = new StreamWriter(stream))
        {
            SerializeToStream(value, writer, settings);
        }
    }
}

Since a large string requires a large contiguous block of memory for the underlying char array, that's where you are going to run out of memory first. See also Json.NET's Performance Tips

To minimize memory usage and the number of objects allocated, Json.NET supports serializing and deserializing directly to a stream. Reading or writing JSON a piece at a time, instead of having the entire JSON string loaded into memory, is especially important when working with JSON documents greater than 85kb in size to avoid the JSON string ending up in the large object heap.

Next, be sure to wrap all your disposables in a using statement, as is shown below.

That might solve your problem, but if it doesn't, you can serialize an IDataReader to JSON using the following JsonConverter:

public class DataReaderConverter : JsonConverter
{
    public override bool CanConvert(Type objectType)
    {
        return typeof(IDataReader).IsAssignableFrom(objectType);
    }

    public override bool CanRead { get { return false; } }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        throw new NotImplementedException();
    }

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        var reader = (IDataReader)value;
        writer.WriteStartArray();
        while (reader.Read())
        {
            writer.WriteStartObject();
            for (int i = 0; i < reader.FieldCount; i++)
            {
                writer.WritePropertyName(reader.GetName(i));
                serializer.Serialize(writer, reader[i]);
            }
            writer.WriteEndObject();
        }
        writer.WriteEndArray();
    }
}

And then serialize to stream as follows:

public static class ExcelExtensions
{
    private static string GetExcelConnectionString(string path)
    {
        string connectionString = string.Empty;
        if (path.EndsWith(".xls"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
            Data Source={0};
            Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
        }
        else if (path.EndsWith(".xlsx"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
            Data Source={0};
            Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
        }
        return connectionString;
    }

    public static string SerializeJsonToString(string path, string workSheetName, JsonSerializerSettings settings = null)
    {
        using (var writer = new StringWriter())
        {
            SerializeJsonToStream(path, workSheetName, writer, settings);
            return writer.ToString();
        }
    }

    public static void SerializeJsonToStream(string path, string workSheetName, Stream stream, JsonSerializerSettings settings = null)
    {
        using (var writer = new StreamWriter(stream))
            SerializeJsonToStream(path, workSheetName, writer, settings);
    }

    public static void SerializeJsonToStream(string path, string workSheetName, TextWriter writer, JsonSerializerSettings settings = null)
    {
        settings = settings ?? new JsonSerializerSettings();
        var converter = new DataReaderConverter();
        settings.Converters.Add(converter);
        try
        {
            string connectionString = GetExcelConnectionString(path);
            DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");

            using (OleDbConnection conn = new OleDbConnection(connectionString))
            {
                conn.Open();
                using (DbCommand selectCommand = factory.CreateCommand())
                {
                    selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", workSheetName);
                    selectCommand.Connection = conn;

                    using (var reader = selectCommand.ExecuteReader())
                    {
                        JsonExtensions.SerializeToStream(reader, writer, settings);
                    }
                }
            }
        }
        finally
        {
            settings.Converters.Remove(converter);
        }
    }
}

Note - lightly tested. Be sure to unit-test this against your existing method before putting it into production! For the converter code I used JSON Serialization of a DataReader as an inspiration.

Update

My converter emits JSON in the same structure as the DataTableConverter of Json.NET. Thus you're going to be able to deserialize to a DataTable automatically using Json.NET. If you prefer a more compact format, you could define your own, for instance:

{
  "columns": [
    "Name 1",
    "Name 2"
  ],
  "rows": [
    [
      "value 11",
      "value 12"
    ],
    [
      "value 21",
      "value 22"
    ]
  ]
}

And them create the following converter:

public class DataReaderArrayConverter : JsonConverter
{
    public override bool CanConvert(Type objectType)
    {
        return typeof(IDataReader).IsAssignableFrom(objectType);
    }

    public override bool CanRead { get { return false; } }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        throw new NotImplementedException();
    }

    static string[] GetFieldNames(IDataReader reader)
    {
        var fieldNames = new string[reader.FieldCount];
        for (int i = 0; i < reader.FieldCount; i++)
            fieldNames[i] = reader.GetName(i);
        return fieldNames;
    }

    static void ValidateFieldNames(IDataReader reader, string[] fieldNames)
    {
        if (reader.FieldCount != fieldNames.Length)
            throw new InvalidOperationException("Unequal record lengths");
        for (int i = 0; i < reader.FieldCount; i++)
            if (fieldNames[i] != reader.GetName(i))
                throw new InvalidOperationException(string.Format("Field names at index {0} differ: \"{1}\" vs \"{2}\"", i, fieldNames[i], reader.GetName(i)));
    }

    const string columnsName = "columns";
    const string rowsName = "rows";

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        var reader = (IDataReader)value;
        writer.WriteStartObject();
        string[] fieldNames = null;
        while (reader.Read())
        {
            if (fieldNames == null)
            {
                writer.WritePropertyName(columnsName);
                fieldNames = GetFieldNames(reader);
                serializer.Serialize(writer, fieldNames);
                writer.WritePropertyName(rowsName);
                writer.WriteStartArray();
            }
            else
            {
                ValidateFieldNames(reader, fieldNames);
            }

            writer.WriteStartArray();
            for (int i = 0; i < reader.FieldCount; i++)
            {
                serializer.Serialize(writer, reader[i]);
            }
            writer.WriteEndArray();
        }
        if (fieldNames != null)
        {
            writer.WriteEndArray();
        }
        writer.WriteEndObject();
    }
}

Of course, you'll need to create your own deserialization converter on the client side.

Alternatively, you could consider compressing your response. I've never tried it, but see HttpWebRequest and GZip Http Responses and ASP.NET GZip Encoding Caveats.