DEV Community

郑沛沛
郑沛沛

Posted on

PostgreSQL JSONB: When to Use It and How to Query It Like a Pro

PostgreSQL's JSONB gives you the flexibility of a document database with the reliability of SQL. Here's when and how to use it effectively.

When to Use JSONB

Good use cases:

  • User preferences and settings
  • API response caching
  • Event metadata with varying schemas
  • Feature flags and configuration

Bad use cases:

  • Core business data you query frequently
  • Data with strict relationships
  • Anything that needs foreign key constraints

Creating Tables with JSONB

CREATE TABLE events (
    id SERIAL PRIMARY KEY,
    event_type VARCHAR(50) NOT NULL,
    payload JSONB NOT NULL DEFAULT '{}',
    metadata JSONB,
    created_at TIMESTAMP DEFAULT NOW()
);

INSERT INTO events (event_type, payload, metadata) VALUES
('user.signup', '{"user_id": 123, "email": "alice@example.com", "plan": "pro"}', '{"ip": "192.168.1.1", "browser": "Chrome"}'),
('order.created', '{"order_id": 456, "items": [{"sku": "A1", "qty": 2}, {"sku": "B2", "qty": 1}], "total": 99.99}', '{"source": "web"}'),
('user.login', '{"user_id": 123, "method": "oauth", "provider": "google"}', '{"ip": "10.0.0.1"}');
Enter fullscreen mode Exit fullscreen mode

Querying JSONB

Basic Access

-- Arrow operator (returns JSON)
SELECT payload->'email' FROM events WHERE event_type = 'user.signup';

-- Double arrow (returns text)
SELECT payload->>'email' FROM events WHERE event_type = 'user.signup';

-- Nested access
SELECT payload->'items'->0->>'sku' FROM events WHERE event_type = 'order.created';
Enter fullscreen mode Exit fullscreen mode

Filtering

-- Filter by JSONB value
SELECT * FROM events WHERE payload->>'user_id' = '123';

-- Cast for numeric comparison
SELECT * FROM events WHERE (payload->>'total')::numeric > 50;

-- Check if key exists
SELECT * FROM events WHERE payload ? 'email';

-- Check if JSONB contains subset
SELECT * FROM events WHERE payload @> '{"plan": "pro"}';
Enter fullscreen mode Exit fullscreen mode

Array Operations

-- Query inside JSON arrays
SELECT * FROM events
WHERE payload->'items' @> '[{"sku": "A1"}]';

-- Expand array elements
SELECT id, item
FROM events, jsonb_array_elements(payload->'items') AS item
WHERE event_type = 'order.created';
Enter fullscreen mode Exit fullscreen mode

Indexing JSONB

-- GIN index: supports @>, ?, ?|, ?& operators
CREATE INDEX idx_events_payload ON events USING GIN(payload);

-- Specific path index (smaller, faster for known queries)
CREATE INDEX idx_events_user_id ON events ((payload->>'user_id'));
CREATE INDEX idx_events_plan ON events ((payload->>'plan'));

-- GIN with jsonb_path_ops (smaller index, only supports @>)
CREATE INDEX idx_events_payload_path ON events USING GIN(payload jsonb_path_ops);
Enter fullscreen mode Exit fullscreen mode

JSONB Functions

-- Merge/update JSONB
UPDATE events
SET payload = payload || '{"verified": true}'
WHERE payload->>'user_id' = '123';

-- Remove a key
UPDATE events
SET payload = payload - 'temporary_field';

-- Set nested value
UPDATE events
SET payload = jsonb_set(payload, '{address,city}', '"New York"')
WHERE id = 1;

-- Pretty print
SELECT jsonb_pretty(payload) FROM events LIMIT 1;

-- Get all keys
SELECT DISTINCT jsonb_object_keys(payload) FROM events;
Enter fullscreen mode Exit fullscreen mode

Python with SQLAlchemy

from sqlalchemy import Column, Integer, String, DateTime, func
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import DeclarativeBase

class Base(DeclarativeBase):
    pass

class Event(Base):
    __tablename__ = "events"
    id = Column(Integer, primary_key=True)
    event_type = Column(String(50), nullable=False)
    payload = Column(JSONB, nullable=False, default={})
    created_at = Column(DateTime, server_default=func.now())

# Query JSONB fields
from sqlalchemy import cast, String

# Filter by JSONB value
events = session.query(Event).filter(
    Event.payload["user_id"].astext == "123"
).all()

# Contains operator
pro_signups = session.query(Event).filter(
    Event.payload.contains({"plan": "pro"})
).all()

# Insert with JSONB
new_event = Event(
    event_type="user.signup",
    payload={"user_id": 789, "email": "bob@example.com", "plan": "free"}
)
session.add(new_event)
session.commit()
Enter fullscreen mode Exit fullscreen mode

Performance Tips

-- Check if your queries use indexes
EXPLAIN ANALYZE SELECT * FROM events WHERE payload @> '{"plan": "pro"}';

-- TOAST compression: large JSONB values are compressed automatically
-- But keep individual JSONB documents under 1MB for best performance

-- Use specific columns for frequently queried fields
ALTER TABLE events ADD COLUMN user_id INT GENERATED ALWAYS AS ((payload->>'user_id')::int) STORED;
CREATE INDEX idx_events_user_id_col ON events(user_id);
Enter fullscreen mode Exit fullscreen mode

Key Takeaways

  1. JSONB is great for semi-structured, varying data
  2. Use GIN indexes for JSONB containment queries
  3. Expression indexes for specific frequently-queried paths
  4. @> (contains) is the most index-friendly operator
  5. Use generated columns for frequently queried JSONB fields

6. Don't use JSONB for everything — structured data belongs in regular columns

🚀 Level up your AI workflow! Check out my AI Developer Mega Prompt Pack — 80 battle-tested prompts for developers. $9.99

Top comments (0)