I'm developting a web crawler that will download a PDF file from a website.
I checked the source code of the website before and I discovery that the button which download the PDF is actually a submit input to a form. That form retrives the file in Content-Disposition header in response.
Here's a picture of it:
My question is, how to get this file using web request (or HTML Agility pack). I tried in this way, but the hearder returns null.
HttpWebResponse response = (HttpWebResponse)req.GetResponse();
string file = response.Headers["Content-Disposition"];
Thanks in advance
I already have my answers, here is what I've done to get the file
response = (HttpWebResponse)request.GetResponse();
stream = response.GetResponseStream();
byte[] retorno = ReadToEnd(stream);
response.Close();
stream.Close();
public static byte[] ReadToEnd(System.IO.Stream stream)
{
long originalPosition = 0;
if (stream.CanSeek)
{
originalPosition = stream.Position;
stream.Position = 0;
}
try
{
byte[] readBuffer = new byte[4096];
int totalBytesRead = 0;
int bytesRead;
while ((bytesRead = stream.Read(readBuffer, totalBytesRead, readBuffer.Length - totalBytesRead)) > 0)
{
totalBytesRead += bytesRead;
if (totalBytesRead == readBuffer.Length)
{
int nextByte = stream.ReadByte();
if (nextByte != -1)
{
byte[] temp = new byte[readBuffer.Length * 2];
Buffer.BlockCopy(readBuffer, 0, temp, 0, readBuffer.Length);
Buffer.SetByte(temp, totalBytesRead, (byte)nextByte);
readBuffer = temp;
totalBytesRead++;
}
}
}
byte[] buffer = readBuffer;
if (readBuffer.Length != totalBytesRead)
{
buffer = new byte[totalBytesRead];
Buffer.BlockCopy(readBuffer, 0, buffer, 0, totalBytesRead);
}
return buffer;
}
finally
{
if (stream.CanSeek)
{
stream.Position = originalPosition;
}
}
}
thanks