Prerequisites:
- Azure function app with blob trigger setup
NOTE This article will not explain how to set up azure function with blob trigger but here is one post that might help
Azure Functions - Creating a new function
In this article, we will see how to read and process the CSV file uploaded to Azure Blob Storage using Azure Functions. We will be using python as programming language.
This article will cover:
- Reading CSV from azure blob storage using python
- Adding python matplotlib plot without saving graph image locally
- Add data to excel using xlsxwriter
- Uploading BytesIO/blob to Azure
Lets get started..
1. Reading CSV from azure blob:
# Read CSV
file_content = myblob.read().decode("utf-8")
# Load csv data to dataframe
df = pd.read_csv(StringIO(file_content))
2. Add image as stream to excel file to blob:
#Image stream to add image
image_stream=BytesIO()
# Insert image stream into excel
sheet=writer.sheets[sheet_name]
sheet.insert_image('F6', 'graph.png', { 'image_data': image_stream})
3. Add data to excel using xlsxwriter :
# Create Excel
xlsx_bytes = BytesIO()
writer = pd.ExcelWriter(xlsx_bytes, engine='xlsxwriter', mode='A')
4. Uploading Blob:
# Uploading generated output excel sheet
blob_client.upload_blob(xlsx_bytes.getvalue(), blob_type="BlockBlob", overwrite=True)
Complete Code:
#Import libraries
import logging
from azure.storage.blob import BlobServiceClient
import io
from io import BytesIO, StringIO
import azure.functions as func
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xlsxwriter
# Function that creates bar chart and add to excel sheet
def add_sheet(sheet_name, cost_key_label, cost_value_label, file_content, writer):
logging.info(f'add_sheet: {sheet_name}')
# Calculate data from csv
logging.info('Calculate data from csv')
df = pd.read_csv(StringIO(file_content)) # read csv data as string
cost=df.groupby(by=cost_key_label)[cost_value_label].sum()
# Calculate data from csv
df_data=df.groupby(by=cost_key_label)[cost_value_label].sum()
df_data.to_excel(writer, sheet_name=sheet_name) #writer
# Create Image
logging.info('Create image')
image_stream=BytesIO() #image stream to add image as stream
plt.clf() # clear the plt object to avoid image overlap
# Label for x and y axis of bar chart
plt.xlabel(cost_key_label, fontsize=10)
plt.ylabel(cost_value_label, fontsize=10)
cost_keys=cost.keys().tolist()
cost_values=cost.tolist()
plt.bar(cost_keys, cost_values)
plt.xticks(fontsize=5.5)
plt.yticks(fontsize=5.5)
# save image as stream
plt.savefig(image_stream, dpi = 100)
# Insert Image into excel
logging.info('Insert image into excel')
sheet=writer.sheets[sheet_name]
sheet.insert_image('F6', 'graph.png', { 'image_data': image_stream})
# main function
def main(myblob: func.InputStream):
logging.info(f"File Name: {myblob.name}")
if not myblob.name.endswith(".csv"):
return
# Read CSV
logging.info("Read csv file")
file_content = myblob.read().decode("utf-8")
# Create Excel
logging.info('Create excel')
xlsx_bytes = BytesIO()
writer = pd.ExcelWriter(xlsx_bytes, engine='xlsxwriter', mode='A')
#Add sheet function call
add_sheet('cost_analysis', 'InvoiceSectionName', 'CostInBillingCurrency', file_content, writer)
add_sheet('subscription_cost', 'SubscriptionName', 'CostInBillingCurrency', file_content, writer)
# Closing writer object
writer.save()
# Upload excel to Azure Blob Storage
logging.info("Upload excel to Azure Blob Storage")
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)
blob_client = blob_service_client.get_blob_client(container=CONTAINER_NAME, blob=myblob.name + '.xlsx')
blob_client.upload_blob(xlsx_bytes.getvalue(), blob_type="BlockBlob", overwrite=True)
.
Hope this Helps! :)
Top comments (0)