I have a requirement to read an excel file from the system and to modify that file (eg remove duplicates ) and drop it in to another location.
I am using IFormFile
.
Can anyone help me to remove duplicates from the file and update the same file and create a new file with the duplicated values.
Here is what I have to read the file
var result = new StringBuilder();
using (var reader = new StreamReader(iFormFile.OpenReadStream()))
{
//read till EOF
while (reader.Peek() >= 0)
{
result.AppendLine(await reader.ReadLineAsync());
}
}
public async Task<Stream> ValidateDataFileAsync(Stream stream, CancellationToken token)
{
List<string> result = new List<string>();
using (var reader = new StreamReader(stream))
{
while (reader.Peek() >= 0)
result.Add(await reader.ReadLineAsync());
}
HashSet<string> hSet = new HashSet<string>(result);
return hSet;
//How do i convert the hashset to stream.
}
Firstly install the Nuget:
Install-Package WindowsAzure.Storage -Version 9.3.3
and
Install-Package EPPlus -Version 4.5.2.1
Make an interface IBlobManager.cs
which will house all your Blob operations:
using System;
using System.IO;
using System.Threading.Tasks;
namespace UploadAzureBlob.Services
{
public interface IBlobManager
{
Task<string> UploadFileToBlobAsync(string fileName, Stream stream);
}
}
Implement the above interface in a class BlobManager.cs
:
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;
namespace UploadAzureBlob.Services
{
public class BlobManager : IBlobManager
{
private CloudBlobClient cloudBlobClient;
public BlobManager()
{
// Retrieve the connection string for blob storage
string storageConnectionString = "";
CloudStorageAccount.TryParse(storageConnectionString, out CloudStorageAccount storageAccount);
// Create the CloudBlobClient that represents the Blob storage endpoint for the storage account.
cloudBlobClient = storageAccount.CreateCloudBlobClient();
}
/// <summary>
/// Uploads a file to blob storage.
/// </summary>
/// <param name="fileName"></param>
/// <param name="file"></param>
/// <returns></returns>
public async Task<string> UploadFileToBlobAsync(string fileName, Stream stream)
{
try
{
// Create a container.
CloudBlobContainer blobContainer = cloudBlobClient.GetContainerReference("test");
await blobContainer.CreateAsync();
BlobContainerPermissions perm = await blobContainer.GetPermissionsAsync();
perm.PublicAccess = BlobContainerPublicAccessType.Container;
await blobContainer.SetPermissionsAsync(perm);
// Get a reference to the blob address, then upload the file to the blob.
var cloudBlockBlob = blobContainer.GetBlockBlobReference(fileName);
await cloudBlockBlob.UploadFromStreamAsync(stream);
// Returning the URI of the freshly created resource
return cloudBlockBlob.Uri.ToString();
}
catch (StorageException ex)
{
throw;
}
}
}
}
Now finally in your controller:
public async Task<string> ValidateDataFileAsync(IFormFile formFile)
{
List<string> result = new List<string>();
using (var reader = new StreamReader(formFile.OpenReadStream()))
{
//read till EOF
while (reader.Peek() >= 0)
result.Add(reader.ReadLineAsync().Result);
}
// Filter by repeated items
result = result.GroupBy(x => x).Where(x => x.Count() > 1).Select(x => x.Key).ToList();
// Write the List<string> into the MemoryStream using the EPPlus package
MemoryStream memoryStream = new MemoryStream();
using (var package = new ExcelPackage())
{
var worksheet = package.Workbook.Worksheets.Add("Worksheet 1");
worksheet.Cells["A1"].LoadFromCollection(result);
memoryStream = new MemoryStream(package.GetAsByteArray());
}
IBlobManager blobManager = new BlobManager();
string newResourceUri = await blobManager.UploadFileToBlobAsync(formFile.FileName, memoryStream);
return newResourceUri;
}