Having a large amount of test data sometimes take a lot of effort, and to simulate a more realistic scenario, it’s good to have a large number of tables with distinct column types. This script generates random tables schema for Teradata.
Environment
Activate your virtualenv
pip install --upgrade virtualenv
python3 -m virtualenv --python python3 env
source ./env/bin/activate
Install the requirements for the metadata generator
pip install -r requirements.txt
Code
teradatasql |
import argparse | |
import logging | |
import random | |
import sys | |
import uuid | |
from teradatasql import connect | |
_DATA_TYPES = [ | |
'INTEGER', 'CHAR(25)', 'DECIMAL(8,2)', 'DATE', 'VARCHAR(25)', 'SMALLINT', | |
'CHAR', 'BYTEINT' | |
] | |
_COLUMN_NAMES = [ | |
'name', 'address', 'city', 'state', 'date_time', 'paragraph', 'randomdata', | |
'person', 'credit_card', 'size', 'reason', 'school', 'food', 'location', | |
'house', 'price', 'cpf', 'cnpj', 'passport', 'security_number', | |
'phone_number', 'bank_account_number', 'ip_address', 'stocks' | |
] | |
_TABLE_NAMES = [ | |
'school_info', 'personal_info', 'persons', 'employees', 'companies', | |
'store', 'home' | |
] | |
_DATABASE_NAMES = [ | |
'school_warehouse', 'company_warehouse', 'on_prem_warehouse', | |
'factory_warehouse', 'organization_warehouse' | |
] | |
def get_conn(connection_args): | |
return connect(None, | |
database=connection_args['database'], | |
host=connection_args['host'], | |
user=connection_args['user'], | |
password=connection_args['pass']) | |
def create_random_metadata(connection_args): | |
conn = get_conn(connection_args) | |
cursor = conn.cursor() | |
for x in range(4): | |
database_name, database_stmt = build_create_database_statement() | |
cursor.execute(database_stmt) | |
for y in range(250): | |
query = build_create_table_statement(database_name) | |
print('\n' + query) | |
cursor.execute(query) | |
conn.commit() | |
cursor.close() | |
def get_random_database_name(): | |
return random.choice(_DATABASE_NAMES) | |
def build_create_database_statement(): | |
database_name = '{}{}'.format(get_random_database_name(), | |
str(random.randint(1, 100000))) | |
database_stmt = 'CREATE DATABASE "{}" AS '.format( | |
database_name) | |
database_stmt += 'PERM = 2000000*(HASHAMP()+1), ' | |
database_stmt += 'SPOOL = 2000000*(HASHAMP()+1), ' | |
database_stmt += 'TEMPORARY = 2000000*(HASHAMP()+1) ' | |
return database_name, database_stmt | |
def get_random_data_type(): | |
return random.choice(_DATA_TYPES) | |
def get_random_column_name(): | |
return random.choice(_COLUMN_NAMES) | |
def get_random_table_name(): | |
return random.choice(_TABLE_NAMES) | |
def build_create_table_statement(database_name): | |
table_stmt = 'CREATE TABLE {}.{}{} ( '.format(database_name, | |
get_random_table_name(), | |
uuid.uuid4().hex[:8]) | |
table_stmt = '{}{}{} {}'.format(table_stmt, get_random_column_name(), | |
str(random.randint(1, 100000)), | |
get_random_data_type()) | |
for x in range(random.randint(1, 15)): | |
table_stmt += ', {}{}'.format(get_random_column_name(), | |
str(random.randint(1, 100000))) + \ | |
' {}'.format(get_random_data_type()) | |
table_stmt = '{} )'.format(table_stmt) | |
return table_stmt | |
def parse_args(): | |
parser = argparse.ArgumentParser( | |
description='Command line generate random metadata into teradata') | |
parser.add_argument('--teradata-host', | |
help='Your teradata server host', | |
required=True) | |
parser.add_argument('--teradata-user', | |
help='Your teradata credentials user') | |
parser.add_argument('--teradata-pass', | |
help='Your teradata credentials password') | |
parser.add_argument('--teradata-database', | |
help='Your teradata database name') | |
return parser.parse_args() | |
if __name__ == "__main__": | |
args = parse_args() | |
# Enable logging | |
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) | |
create_random_metadata({ | |
'database': args.teradata_database, | |
'host': args.teradata_host, | |
'user': args.teradata_user, | |
'pass': args.teradata_pass | |
}) |
Execution
export TERADATA_SERVER=127.0.0.1
export TERADATA_USERNAME=my_user
export TERADATA_PASSWORD=my_user_pwd
export TERADATA_DATABASE=my_db
python metadata_generator.py \
--teradata-host=$TERADATA_SERVER \
--teradata-user=$TERADATA_USERNAME \
--teradata-pass=$TERADATA_PASSWORD \
--teradata-database=$TERADATA_DATABASE
And that's it!
If you have difficulties, don’t hesitate reaching out. I would love to help you!
Top comments (1)
Hi,
How to extact DDL/DML files from teradata using python ? any approach that you would suggest ?
Also, is there any scripter available for teradata unlike mssql-scripter ? Is so, can please share the link and resources ?