Removing next line characters from Excel files

Nandan Hegde 30,951 Reputation points MVP
2020-09-08T12:56:18.627+00:00

Hello All,
I am pretty new to Azure functions.
So wanted to understand would it be possible to replace all new line characters within an Excel file uploaded in a blob location with a specific character lie '@#@#'

If so, is there any sample logic which I can refer to , to make it as dynamic as possible by parameterising the sheetname etc.

Azure Functions
Azure Functions
An Azure service that provides an event-driven serverless compute platform.
4,605 questions
{count} votes

Accepted answer
  1. JayaC-MSFT 5,526 Reputation points
    2020-09-09T12:20:50.19+00:00

    @Nandan Hegde I have tried with C# http trigger function. Sharing the code snippet. for excel operations I have used OpenXML SDK. Also for the replace operation I have used C# only, you may use regex expressions to reduce line of codes.

    using DocumentFormat.OpenXml.Packaging;  
    using DocumentFormat.OpenXml.Spreadsheet;  
    ------------  
     public static class Function1  
        {  
            [FunctionName("Function1")]  
            public static async Task<IActionResult> Run(  
                [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,  
                ILogger log)  
            {  
                log.LogInformation("C# HTTP trigger function processed a request.");  
      
                try  
                {  
                    string StorageConnection = "<your storage connection>"; //read this from configuration  
                    if (CloudStorageAccount.TryParse(StorageConnection, out CloudStorageAccount storageAccount))  
                    {  
                        CloudBlobClient BlobClient = storageAccount.CreateCloudBlobClient();  
                        CloudBlobContainer container = BlobClient.GetContainerReference("<container name>");  
      
                        if (await container.ExistsAsync())  
                        {  
                            CloudBlob file = container.GetBlobReference("Blobtest.xlsx"); //replace the filename  
      
                            if (await file.ExistsAsync())  
                            {  
                                using (MemoryStream ms = new MemoryStream())  
                                {  
                                      await file.DownloadToStreamAsync(ms);  
                                      Stream blobStream = file.OpenReadAsync().Result;  
                                  
                                    using (SpreadsheetDocument workbook = SpreadsheetDocument.Open(ms, true, new OpenSettings { AutoSave = true }))  
                                    {  
    
                                        SharedStringTablePart sharedStringsPart = workbook.WorkbookPart.SharedStringTablePart;  
                                        IEnumerable<Text> sharedStringTextElements = sharedStringsPart.SharedStringTable.Descendants<Text>();  
                                        DoReplace(sharedStringTextElements);  
      
                                        IEnumerable<WorksheetPart> worksheetParts = workbook.GetPartsOfType<WorksheetPart>();  
                                        foreach (var worksheet in worksheetParts)  
                                        {  
                                            var allTextElements = worksheet.Worksheet.Descendants<Text>();  
                                            DoReplace(allTextElements);  
                                        }  
      
                                    }  
      
                                    ms.Seek(0L, SeekOrigin.Begin);  
                                          
      
                                    var cloudBlockBlob = container.GetBlockBlobReference("Blobtest_Updated1.xlsx");  
                                    if (await cloudBlockBlob.ExistsAsync() == false)  
                                    {  
                                        ms.Position = 0;  
                                        using (var uploadStream = ms)  
                                        {  
                                            await cloudBlockBlob.UploadFromStreamAsync(uploadStream);  
                                        }  
                                        
      
                                    }  
                                }  
                              
                        }  
                    }  
                }  
               }  
                catch (Exception e)  
                {  
                    throw e;  
                }  
                string name = req.Query["name"];  
      
                string requestBody = await new StreamReader(req.Body).ReadToEndAsync();  
                dynamic data = JsonConvert.DeserializeObject(requestBody);  
                name = name ?? data?.name;  
      
                string responseMessage = string.IsNullOrEmpty(name)  
                    ? "This HTTP triggered function executed successfully. Pass a name in the query string or in the request body for a personalized response."  
                    : $"Hello, {name}. This HTTP triggered function executed successfully.";  
      
                return new OkObjectResult(responseMessage);  
            }  
      
            private static void DoReplace(IEnumerable<Text> textElements)  
            {  
               string[] chars = new string[] { ",", ".", "/", "!", "@", "#", "$", "%", "^", "&", "*", "'", "\"", ";", "_", "(", ")", ":", "|", "[", "]" };  
                foreach (var text in textElements)  
                {  
                      
                    for(int i = 0; i < chars.Length; i++)  
                    {  
                        if(text.Text.Contains(chars[i]))  
                        {  
                            text.Text = text.Text.Replace(chars[i], "");  
                        }  
                    }  
                }  
            }  
        }  
    

    You can modify the code according to your requirement. You can directly get the sheet data as well:

     WorkbookPart workbookPart = myWorkbook.WorkbookPart;  
     WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();  
    SheetData sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();  
    

    If you think this answer helped , please "Accept the answer" and "Up-vote" so that it can help others in the community.

    0 comments No comments

1 additional answer

Sort by: Most helpful
  1. JayaC-MSFT 5,526 Reputation points
    2020-09-22T13:01:24.197+00:00

    @Priya Jha

    I am sharing the full code here, let me if it helps:

    using System;  
    using System.IO;  
    using System.Threading.Tasks;  
    using Microsoft.AspNetCore.Mvc;  
    using Microsoft.Azure.WebJobs;  
    using Microsoft.Azure.WebJobs.Extensions.Http;  
    using Microsoft.AspNetCore.Http;  
    using Microsoft.Extensions.Logging;  
    using Newtonsoft.Json;  
    using Microsoft.WindowsAzure.Storage;  
    using Microsoft.WindowsAzure.Storage.Blob;  
    using DocumentFormat.OpenXml.Packaging;  
    using DocumentFormat.OpenXml.Spreadsheet;  
    using System.Linq;  
    using System.Collections.Generic;  
    using System.Text.RegularExpressions;  
      
    namespace ReplaceinExcel  
    {  
        public static class Function1  
        {  
            [FunctionName("Function1")]  
            public static async Task<IActionResult> Run(  
                [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,  
                ILogger log)  
            {  
                log.LogInformation("C# HTTP trigger function processed a request.");  
      
                try  
                {  
                    string StorageConnection = "<your storage connection string>";  
                    if (CloudStorageAccount.TryParse(StorageConnection, out CloudStorageAccount storageAccount))  
                    {  
                        CloudBlobClient BlobClient = storageAccount.CreateCloudBlobClient();  
                        CloudBlobContainer container = BlobClient.GetContainerReference("<container name>");  
      
                        if (await container.ExistsAsync())  
                        {  
                            CloudBlob file = container.GetBlobReference("Blobtest.xlsx");  
      
                            if (await file.ExistsAsync())  
                            {  
                                using (MemoryStream ms = new MemoryStream())  
                                {  
                                      await file.DownloadToStreamAsync(ms);  
                                      Stream blobStream = file.OpenReadAsync().Result;  
                                  
                                    using (SpreadsheetDocument workbook = SpreadsheetDocument.Open(ms, true, new OpenSettings { AutoSave = true }))  
                                    {  
      
                                        //WorkbookPart workbookPart = workbook.WorkbookPart;  
                                        //WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();  
                                        //SheetData sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();  
      
                                        SharedStringTablePart sharedStringsPart = workbook.WorkbookPart.SharedStringTablePart;  
                                        IEnumerable<Text> sharedStringTextElements = sharedStringsPart.SharedStringTable.Descendants<Text>();  
                                        DoReplace(sharedStringTextElements);  
      
                                        IEnumerable<WorksheetPart> worksheetParts = workbook.GetPartsOfType<WorksheetPart>();  
                                        foreach (var worksheet in worksheetParts)  
                                        {  
                                            var allTextElements = worksheet.Worksheet.Descendants<Text>();  
                                            DoReplace(allTextElements);  
                                        }  
      
                                    }  
      
                                    ms.Seek(0L, SeekOrigin.Begin);  
                                          
      
                                    var cloudBlockBlob = container.GetBlockBlobReference("Blobtest_Updated1.xlsx");  
                                    if (await cloudBlockBlob.ExistsAsync() == false)  
                                    {  
                                        ms.Position = 0;  
                                        using (var uploadStream = ms)  
                                        {  
                                            await cloudBlockBlob.UploadFromStreamAsync(uploadStream);  
                                        }  
                                        
      
                                    }  
                                }  
                              
                        }  
                    }  
                }  
               }  
                catch (Exception e)  
                {  
                    throw e;  
                }  
                string name = req.Query["name"];  
      
                string requestBody = await new StreamReader(req.Body).ReadToEndAsync();  
                dynamic data = JsonConvert.DeserializeObject(requestBody);  
                name = name ?? data?.name;  
      
                string responseMessage = string.IsNullOrEmpty(name)  
                    ? "This HTTP triggered function executed successfully. Pass a name in the query string or in the request body for a personalized response."  
                    : $"Hello, {name}. This HTTP triggered function executed successfully.";  
      
                return new OkObjectResult(responseMessage);  
            }  
      
            private static void DoReplace(IEnumerable<Text> textElements)  
            {  
               string[] chars = new string[] { ",", ".", "/", "!", "@", "#", "$", "%", "^", "&", "*", "'", "\"", ";", "_", "(", ")", ":", "|", "[", "]" };  
                foreach (var text in textElements)  
                {  
                      
                    for(int i = 0; i < chars.Length; i++)  
                    {  
                        if(text.Text.Contains(chars[i]))  
                        {  
                            text.Text = text.Text.Replace(chars[i], "");  
                        }  
                    }  
                }  
            }  
        }  
      
          
    }  
    

    Yes, in here it is checking the cell values. Please refer to the "DoReplace" function where I have defined all the special characters. However, as I mentioned before, you can modify the "DoReplace" function based on your requirement. You may use some regex expression as well.

    You can also play around with the sheetdata and not the shared table part:

    //WorkbookPart workbookPart = workbook.WorkbookPart;  
    //WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();  
    //SheetData sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();  
    

    If you think this answer helped , please "Accept the answer" and "Up-vote" so that it can help others in the community.