Populating data
First of all make sure that your user has SUPERUSER privileges.
You can set this using.
ALTER ROLE <user> WITH SUPERUSER;
Here we'll see three ways to populate our data
- using inbuilt csv module
- using SQL CALL
- using pandas
1. Using CSV module
import psycopg2
import csv
connection = psycopg2.connect(
database="store",
user="postgres",
password="lgwmfpsc",
host="localhost",
port="5432", # default post
)
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS superstore;")
connection.commit()
create_query = '''CREATE TABLE superstore (
id INT PRIMARY KEY,
ship_model VARCHAR(255) NOT NULL,
segment VARCHAR(255) NOT NULL,
country VARCHAR(255) NOT NULL,
city VARCHAR(255) NOT NULL,
state VARCHAR(255) NOT NULL,
postal_code INT NOT NULL,
region VARCHAR(255) NOT NULL,
category VARCHAR(255) NOT NULL,
subcategory VARCHAR(255) NOT NULL,
sales FLOAT NOT NULL,
quantity INT NOT NULL,
discount FLOAT NOT NULL,
profit FLOAT NOT NULL
)
'''
cursor.execute(create_query)
connection.commit()
with open('./SampleSuperstore.csv') as file:
id = 1
csv_data = csv.reader(file)
next(csv_data)
for row in csv_data:
row = [id] + row
row_tuple = tuple(row)
cursor.execute('''INSERT INTO superstore (id, ship_model, segment, country, city, state, postal_code,
region, category, subcategory, sales, quantity, discount, profit)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', row_tuple)
id += 1
connection.commit()
cursor.execute("SELECT * FROM superstore LIMIT 10")
print(cursor.fetchall())
connection.close()
Before iterating throught the list next is used to skip the first line of the file as they are headers.
2. Using SQL CALL
import psycopg2
import csv
connection = psycopg2.connect(
user = 'postgres',
password = 'lgwmfpsc',
host = 'localhost',
database = 'store',
port="5432"
)
cursor = connection.cursor()
create_query = '''CREATE TABLE superstore (
id INT(255) NOT NULL AUTO_INCREMENT,
ship_model VARCHAR(255) NOT NULL,
segment VARCHAR(255) NOT NULL,
country VARCHAR(255) NOT NULL,
city VARCHAR(255) NOT NULL,
state VARCHAR(255) NOT NULL,
postal_code INT NOT NULL,
region VARCHAR(255) NOT NULL,
category VARCHAR(255) NOT NULL,
subcategory VARCHAR(255) NOT NULL,
sales FLOAT NOT NULL,
quantity INT NOT NULL,
discount FLOAT NOT NULL,
profit FLOAT NOT NULL,
PRIMARY KEY(id))
'''
cursor.execute("DROP TABLE IF EXISTS superstore")
cursor.execute(create_query)
q = '''LOAD DATA LOCAL INFILE '/home/sapan/codes/Linkedin-Python/database-python/csv-database/SampleSuperstore.csv'
INTO TABLE superstore
FIELDS TERMINATED BY ',' ENCLOSED BY "" (ship_model, segment, country, city, state, postal_code,
region, category, subcategory, sales, quantity, discount, profit);'''
cursor.execute(q)
connection.commit()
cursor.execute("SELECT * FROM superstore LIMIT 10")
print(cursor.fetchall())
connection.close()
3. Usinng pandas
# using pandas with sqlalchemy
import pandas as pd
from sqlalchemy import Column, Integer, String, DateTime, Float, Boolean
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('postgres://postgres:lgwmfpsc@localhost/store')
Base = declarative_base()
class SalesReport(Base):
__tablename__ = 'salesreport'
id = Column(Integer, primary_key=True)
country = Column("Country", String(length=50))
state = Column("State", String(length=50))
city = Column("City", String(length=50))
postalcode = Column("Postal Code", String(length=50))
def __repr__(self):
return f'''<Salesreport(id='{self.country}', country='{self.country}', state='{self.state}', postal_code='{self.postalcode}')>
'''
Base.metadata.create_all(engine)
file_name = './sample_data.csv'
data_frame = pd.read_csv(file_name)
# # the below function call automatically import the data into our database, so we do not
# # to work with the session here
data_frame.to_sql(
con = engine,
name = SalesReport.__tablename__,
if_exists='append',
index = False
)
# # however if we want to prove the data was imported, we need session here
session = sessionmaker()
session.configure(bind=engine)
s = session()
results = s.query(SalesReport).limit(10).all()
for i, r in enumerate(results):
print(i, r)
print()
overall_max = s.query(func.max(SalesReport.postalcode)).scalar
print(overall_max)
results = s.query(SalesReport).order_by(SalesReport.postalcode.desc()).limit(10)
for row in results:
print(results)
In this post we only see how to populate our data. If you want to view completed details how to use postgreSQL with python refers to this article.
https://sapanravidas.medium.com/postgres-with-python3-ec3cafc5c98d
Top comments (0)