YugabyteDB is an open-source distributed SQL database that is PostgreSQL-compatible. It combines the scale of NoSQL with the features of SQL, providing automatic sharding, replication, and geo-distribution.
What Is YugabyteDB?
YugabyteDB provides a PostgreSQL-compatible API (YSQL) and a Cassandra-compatible API (YCQL) in a single database. It uses a Google Spanner-inspired architecture with Raft consensus for strong consistency.
Key Features:
- PostgreSQL wire compatible (YSQL)
- Cassandra compatible (YCQL)
- Automatic sharding and rebalancing
- Synchronous replication (Raft)
- Geo-distributed deployments
- Row-level geo-partitioning
- Distributed transactions
- Change data capture (CDC)
Quick Start
# Docker
docker run -d --name yugabyte -p 5433:5433 -p 9000:9000 -p 9042:9042 \
yugabytedb/yugabyte:latest bin/yugabyted start --daemon=false
# Connect with psql
psql -h localhost -p 5433 -U yugabyte
# Web UI at http://localhost:9000
YSQL API (PostgreSQL-Compatible)
import psycopg2
# Same psycopg2 driver as PostgreSQL!
conn = psycopg2.connect(
host="localhost", port=5433,
user="yugabyte", dbname="yugabyte"
)
cur = conn.cursor()
# Create table
cur.execute("""
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
price DECIMAL(10,2),
category TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
)
""")
# Insert with RETURNING
cur.execute("""
INSERT INTO products (name, price, category)
VALUES (%s, %s, %s) RETURNING id
""", ("Widget Pro", 29.99, "Hardware"))
product_id = cur.fetchone()[0]
print(f"Created product: {product_id}")
# Full SQL including CTEs, window functions, JSON
cur.execute("""
WITH ranked AS (
SELECT name, price, category,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as rank
FROM products
)
SELECT name, price, category FROM ranked WHERE rank <= 3
""")
for row in cur.fetchall():
print(f"{row[0]}: ${row[1]} ({row[2]})")
conn.commit()
conn.close()
YCQL API (Cassandra-Compatible)
from cassandra.cluster import Cluster
cluster = Cluster(["localhost"], port=9042)
session = cluster.connect()
session.execute("""
CREATE KEYSPACE IF NOT EXISTS myapp
WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}
""")
session.set_keyspace("myapp")
session.execute("""
CREATE TABLE IF NOT EXISTS events (
user_id UUID,
event_time TIMESTAMP,
event_type TEXT,
PRIMARY KEY (user_id, event_time)
) WITH CLUSTERING ORDER BY (event_time DESC)
""")
YugabyteDB Admin API
import requests
# Master API
master = requests.get("http://localhost:7000/api/v1/cluster-config").json()
print(f"Replication factor: {master.get('replication_info', {})}")
# Tablet server metrics
metrics = requests.get("http://localhost:9000/metrics").text
print(metrics[:200])
# Table info
tables = requests.get("http://localhost:7000/api/v1/tables").json()
for table in tables:
print(f"Table: {table['keyspace']}.{table['table_name']}")
Geo-Distribution
-- Create tablespace for geo-partitioning
CREATE TABLESPACE us_east WITH (
replica_placement = '{"num_replicas":3, "placement_blocks":[
{"cloud":"aws","region":"us-east-1","zone":"us-east-1a","min_num_replicas":1}]}'
);
-- Row-level geo-partitioning
CREATE TABLE users (
id UUID, region TEXT, name TEXT, email TEXT,
PRIMARY KEY (id, region)
) PARTITION BY LIST (region);
CREATE TABLE users_us PARTITION OF users FOR VALUES IN ('us')
TABLESPACE us_east;
Resources
- YugabyteDB Docs
- YugabyteDB GitHub — 9K+ stars
Need to scrape web data? Check out my web scraping tools on Apify — production-ready actors for Reddit, Google Maps, and more. Questions? Email me at spinov001@gmail.com
Top comments (0)