I am trying to use amazon textract to get specific information from resume pdf file wheich is available in a local folder.
I am able to get the text using code below
sample code
using (var textractClient = new AmazonTextractClient(RegionEndpoint.APSouth1))
{
FileStream fileStream = new FileStream(HostingEnvironment.MapPath("~/pdfresume.pdf"), FileMode.Open, FileAccess.Read);
MemoryStream memoryStream = new MemoryStream();
await fileStream.CopyToAsync(memoryStream);
await fileStream.FlushAsync();
var analyzeDocumentRequest = new AnalyzeDocumentRequest()
{
Document = new Document { Bytes = memoryStream },
FeatureTypes = new List<string> { "FORMS" }
//here i wish to pass many queries like "what is the email id"
};
var analyzeDocumentResponse = await textractClient.AnalyzeDocumentAsync(analyzeDocumentRequest);
foreach(var blocks in analyzeDocumentResponse.Blocks)
{
HttpContext.Current.Response.Write(blocks.Text);
}
}
But my purpose is to get specific data using queries
as in php
'QueriesConfig' => [
'Queries' => [
[
'Text' => 'What is name'
],
[
'Text' => 'What is email'
],
You should add "QUERIES" to the FeatureTypes, and then inject the query.
Something like:
using (var textractClient = new AmazonTextractClient(RegionEndpoint.APSouth1))
{
FileStream fileStream = new FileStream(HostingEnvironment.MapPath("~/pdfresume.pdf"), FileMode.Open, FileAccess.Read);
MemoryStream memoryStream = new MemoryStream();
await fileStream.CopyToAsync(memoryStream);
await fileStream.FlushAsync();
Query query = new Query {
Alias = "MyAlias",
Text = "What is my name"
};
List<Query> queries = new List<Query>{ query };
var analyzeDocumentRequest = new AnalyzeDocumentRequest()
{
Document = new Document { Bytes = memoryStream },
FeatureTypes = new List<string> { "QUERIES" },
QueriesConfig = new QueriesConfig{
Queries = queries
};
};
var analyzeDocumentResponse = await textractClient.AnalyzeDocumentAsync(analyzeDocumentRequest);
foreach(var blocks in analyzeDocumentResponse.Blocks)
{
if(blocks.BlockType.Value== "QUERY_RESULT")
HttpContext.Current.Response.Write(blocks.Text);
}
}