DEV Community

loading...

Unit test using pytest

chhamanshu Dixit
・1 min read

Hi Team,
I am very new to pytest is there anyone who can help me writing the unit test using pytest for the below code :-

import os
import numpy as np
import pandas as pd
import re

from core.enums.filetype import FileType
from core.interfaces.trainingdataprovider import TrainingDataProvider
from tdm.constants import SqlStoreProcedures
from tdm.utils.textprocessing import TextProcessing

class LooDataProvider(TrainingDataProvider):
PROCESSED_File: str = f'./data/processed/'
RAW_OUTPUT_CSV_FILENAME: str = './data/processed/raw.csv'

def obtain_data(self) -> pd.DataFrame:
    data_filename: str = f'{self.PROCESSED_File}{self.training_config.EXPERIMENT_NAME}.pkl'
    self.data = pd.read_pickle(data_filename) if os.path.exists(data_filename) else None
    self.PROCESSED_File = data_filename
    if self.data is None:
        self.logger.debug('Processed ')
        self.data = self.__load_union_data()
    return self.data

def __load_union_data(self) -> pd.DataFrame:
    data: pd.DataFrame = pd.read_csv(self.RAW_OUTPUT_CSV_FILENAME, keep_default_na=False)\
        if os.path.exists(self.RAW_OUTPUT_CSV_FILENAME) else None

    if data is None:
        self.logger.info("Cached data does not exist; pulling from DB (this might take a while)!")
        pull_data_query: str = "SELECT * FROM processed_table WHERE Taxability NOT IN ('IGNORE')"
        data = pd.DataFrame(self.data_access_layer.select(pull_data_query))
        self.store_data(data, self.RAW_OUTPUT_CSV_FILENAME, FileType.csv)


    # clean text, filter taxability, encode taxability, encode reason code, concatinate textcols
    self.logger.info(f'processing data')

    #TODO: discuss support for data processing pipeline
    text_processing = TextProcessing(data,
                          self.training_config.features,
                          self.training_config.col_cat,
                          self.training_config.balance_col)
    text_processing.preprocess_data()
    if self.training_config.balance_col != '':
        data = text_processing.balance_data()
    else:
        data = text_processing.data_df

    self.store_data(data, self.PROCESSED_File, FileType.pickle)
    csv_file = f'{os.path.splitext(self.PROCESSED_File)[0]}.csv'
    self.store_data(data, csv_file, FileType.csv)

    return data
Enter fullscreen mode Exit fullscreen mode

Discussion (0)