Hi Team,
I am very new to pytest is there anyone who can help me writing the unit test using pytest for the below code :-
import os
import numpy as np
import pandas as pd
import re
from core.enums.filetype import FileType
from core.interfaces.trainingdataprovider import TrainingDataProvider
from tdm.constants import SqlStoreProcedures
from tdm.utils.textprocessing import TextProcessing
class LooDataProvider(TrainingDataProvider):
PROCESSED_File: str = f'./data/processed/'
RAW_OUTPUT_CSV_FILENAME: str = './data/processed/raw.csv'
def obtain_data(self) -> pd.DataFrame:
data_filename: str = f'{self.PROCESSED_File}{self.training_config.EXPERIMENT_NAME}.pkl'
self.data = pd.read_pickle(data_filename) if os.path.exists(data_filename) else None
self.PROCESSED_File = data_filename
if self.data is None:
self.logger.debug('Processed ')
self.data = self.__load_union_data()
return self.data
def __load_union_data(self) -> pd.DataFrame:
data: pd.DataFrame = pd.read_csv(self.RAW_OUTPUT_CSV_FILENAME, keep_default_na=False)\
if os.path.exists(self.RAW_OUTPUT_CSV_FILENAME) else None
if data is None:
self.logger.info("Cached data does not exist; pulling from DB (this might take a while)!")
pull_data_query: str = "SELECT * FROM processed_table WHERE Taxability NOT IN ('IGNORE')"
data = pd.DataFrame(self.data_access_layer.select(pull_data_query))
self.store_data(data, self.RAW_OUTPUT_CSV_FILENAME, FileType.csv)
# clean text, filter taxability, encode taxability, encode reason code, concatinate textcols
self.logger.info(f'processing data')
#TODO: discuss support for data processing pipeline
text_processing = TextProcessing(data,
self.training_config.features,
self.training_config.col_cat,
self.training_config.balance_col)
text_processing.preprocess_data()
if self.training_config.balance_col != '':
data = text_processing.balance_data()
else:
data = text_processing.data_df
self.store_data(data, self.PROCESSED_File, FileType.pickle)
csv_file = f'{os.path.splitext(self.PROCESSED_File)[0]}.csv'
self.store_data(data, csv_file, FileType.csv)
return data
Top comments (0)