I am using HttpClient (System.Net.Http, Version=4.2.1.0) to POST an HTTP request with multipart form data to a web API. The form data includes a string parameter (benchmark
) and a file (addressFile
) which is contained in stream
. The API call returns a CSV file which I want to save to disk.
The response contains the header Transfer-Encoding: chunked
and the data contained in responseBytes
includes the chunk headers. I would expect the HttpClient library to strip out these headers, which are metadata for the actual content. Instead, it simply includes the header rows in the Content
.
What is the correct way to handle these chunk headers?
I could of course write a method to handle the headers myself, but I find it hard to believe that the HttpClient library doesn't already have this functionality baked in somewhere.
using (var client = new HttpClient())
{
var content = new MultipartFormDataContent();
content.Add(new StringContent("Public_AR_Current"), "benchmark");
content.Add(new ByteArrayContent(stream.ToArray()), "addressFile", "addressFile.csv");
var response = await client.PostAsync("https://geocoding.geo.census.gov/geocoder/locations/addressbatch", content);
var responseBytes = await response.Content.ReadAsByteArrayAsync();
saveResponse(responseBytes);
var geocodedItems = ParseGeocodeResponse(responseBytes);
var parsedItems = geocodedItems.Select(gi => gi.ToEpaHandlerUsCensusGeocode());
return parsedItems;
}
Note the chunk header on the first and subsequent lines (0fe8
, 0060
, 0fe8
).
0fe8
0fe8
"AK0000036228","500 HOLLYWOOD DR, ANCHORAGE, AK, 99501","Match","Exact","500 HOLLYWOOD DR, ANCHORAGE, AK, 99501","-149.87424,61.23034","190797469","R"
"AK0000363994","3155 E 18TH CIR, ANCHORAGE, AK, 99508","Match","Non_Exact","3155 E 18TH CIR, ANCHORAGE, AK, 99508","-149.82193,61.20462","190799569","L"
...
0060
28712","N 65 DEG 35 15 W 167 DEG 55 18, WALES, AK, 99734","No_Match"
"AK0000112227","KODIAK ARPR
...
0fe8
T AREA, KODIAK, AK, 99615","No_Match"
"AK0000033902","2130 E DIMOND BLVD, ANCHORAGE, AK, 99515","Match","Non_Exact","2130 W DIMOND BLVD, ANCHORAGE, AK, 99515","-149.91881,61.1375","190795925","L"
"AK0000562769","3100 TONGASS AVE, KETCHIKAN, AK, 99901-5746","No_Match"
I would expect headers to be stripped out by HttpClient library.
"AK0000036228","500 HOLLYWOOD DR, ANCHORAGE, AK, 99501","Match","Exact","500 HOLLYWOOD DR, ANCHORAGE, AK, 99501","-149.87424,61.23034","190797469","R"
"AK0000363994","3155 E 18TH CIR, ANCHORAGE, AK, 99508","Match","Non_Exact","3155 E 18TH CIR, ANCHORAGE, AK, 99508","-149.82193,61.20462","190799569","L"
"AK0000228718","1050 ASPEN ST, FAIRBANKS, AK, 99709-5501","Match","Exact","1050 ASPEN ST, FAIRBANKS, AK, 99709","-147.7731,64.8535","605310042","L"
"AK0000536714","SMITH COVE IN SMITH LAGOON T74S R86E CRM S17 & 20, KASAAN, AK, 99901","No_Match"
"AK0001413822","USS-12403, N BANK WOOD RIVER, ALEKNAGIK, AK, 99555","No_Match"
"AK0000489567","BREAKWATER BTWN WESTERN AVE & TAIT ST, METLAKATLA, AK, 99926","No_Match"
I ended up writing this extension method which performs sufficiently well for my use case.
public static Task<Stream> ReadAsStreamAsync(this HttpContent content, bool isChunked)
{
if (!isChunked)
{
return content.ReadAsStreamAsync();
}
else
{
var task = content.ReadAsStreamAsync()
.ContinueWith<Stream>((streamTask) =>
{
var outputStream = new MemoryStream();
var buffer = new char[1024 * 1024];
var stream = streamTask.Result;
// No using() so that we don't dispose stream.
var tr = new StreamReader(stream);
var tw = new StreamWriter(outputStream);
while (!tr.EndOfStream)
{
var chunkSizeStr = tr.ReadLine().Trim();
var chunkSize = int.Parse(chunkSizeStr, System.Globalization.NumberStyles.HexNumber);
tr.ReadBlock(buffer, 0, chunkSize);
tw.Write(buffer, 0, chunkSize);
tr.ReadLine();
}
return outputStream;
});
return task;
}
}