DEV Community


Unit test using pytest

chhamanshu Dixit
・1 min read

Hi Team,
I am very new to pytest is there anyone who can help me writing the unit test using pytest for the below code :-

import os
import numpy as np
import pandas as pd
import re

from core.enums.filetype import FileType
from core.interfaces.trainingdataprovider import TrainingDataProvider
from tdm.constants import SqlStoreProcedures
from tdm.utils.textprocessing import TextProcessing

class LooDataProvider(TrainingDataProvider):
PROCESSED_File: str = f'./data/processed/'
RAW_OUTPUT_CSV_FILENAME: str = './data/processed/raw.csv'

def obtain_data(self) -> pd.DataFrame:
    data_filename: str = f'{self.PROCESSED_File}{self.training_config.EXPERIMENT_NAME}.pkl' = pd.read_pickle(data_filename) if os.path.exists(data_filename) else None
    self.PROCESSED_File = data_filename
    if is None:
        self.logger.debug('Processed ') = self.__load_union_data()

def __load_union_data(self) -> pd.DataFrame:
    data: pd.DataFrame = pd.read_csv(self.RAW_OUTPUT_CSV_FILENAME, keep_default_na=False)\
        if os.path.exists(self.RAW_OUTPUT_CSV_FILENAME) else None

    if data is None:"Cached data does not exist; pulling from DB (this might take a while)!")
        pull_data_query: str = "SELECT * FROM processed_table WHERE Taxability NOT IN ('IGNORE')"
        data = pd.DataFrame(
        self.store_data(data, self.RAW_OUTPUT_CSV_FILENAME, FileType.csv)

    # clean text, filter taxability, encode taxability, encode reason code, concatinate textcols'processing data')

    #TODO: discuss support for data processing pipeline
    text_processing = TextProcessing(data,
    if self.training_config.balance_col != '':
        data = text_processing.balance_data()
        data = text_processing.data_df

    self.store_data(data, self.PROCESSED_File, FileType.pickle)
    csv_file = f'{os.path.splitext(self.PROCESSED_File)[0]}.csv'
    self.store_data(data, csv_file, FileType.csv)

    return data
Enter fullscreen mode Exit fullscreen mode

Discussion (0)