Part 1: Setup Without Docker (Traditional Method)
Why Cassandra with Django?
Before we dive in, let's understand why you'd want to use Cassandra with Django:
Cassandra strengths:
- Handles massive write-heavy workloads
- Linear scalability (add nodes, get more capacity)
- No single point of failure
- Perfect for time-series data, logs, IoT data
Django + Cassandra use cases:
- Analytics platforms
- Real-time messaging systems
- IoT data collection
- Event logging systems
- Social media feeds
Important note: Cassandra is NOT a replacement for PostgreSQL/MySQL for typical Django apps. Use it for specific high-scale scenarios.
Installing Cassandra Locally
On Ubuntu/Debian
# Add Cassandra repository
echo "deb https://debian.cassandra.apache.org 41x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list
# Add repository keys
curl https://downloads.apache.org/cassandra/KEYS | sudo apt-key add -
# Update and install
sudo apt-get update
sudo apt-get install cassandra
# Start Cassandra
sudo systemctl start cassandra
sudo systemctl enable cassandra
# Verify installation
nodetool status
On macOS
# Install via Homebrew
brew install cassandra
# Start Cassandra
brew services start cassandra
# Verify
cqlsh
On Windows
- Download Apache Cassandra from official website
- Extract to
C:\cassandra - Set JAVA_HOME environment variable
- Run
C:\cassandra\bin\cassandra.bat
Setting Up Django Project
Step 1: Install Required Packages
# Create virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install Django and Cassandra driver
pip install django
pip install cassandra-driver
pip install django-cassandra-engine
# Create requirements.txt
pip freeze > requirements.txt
Your requirements.txt should include:
Django==4.2.7
cassandra-driver==3.28.0
django-cassandra-engine==1.8.0
Step 2: Create Django Project
django-admin startproject myproject
cd myproject
python manage.py startapp myapp
Step 3: Configure Django Settings
Open myproject/settings.py:
# Add cassandra engine to installed apps
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django_cassandra_engine',
'myapp',
]
# Configure database connections
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
},
'cassandra': {
'ENGINE': 'django_cassandra_engine',
'NAME': 'myapp_keyspace',
'HOST': '127.0.0.1',
'PORT': 9042,
'OPTIONS': {
'replication': {
'strategy_class': 'SimpleStrategy',
'replication_factor': 1
},
'connection': {
'consistency': 'ONE',
'retry_connect': True,
'idle_heartbeat_interval': 30,
'idle_heartbeat_timeout': 10,
}
}
}
}
# Set default database for cassandra engine
DATABASE_ROUTERS = ['django_cassandra_engine.routers.CassandraRouter']
Step 4: Create Cassandra Models
Create myapp/models.py:
from cassandra.cql import TimeUUID
from django_cassandra_engine.models import DjangoCassandraModel
from django_cassandra_engine import columns
import uuid
class UserActivity(DjangoCassandraModel):
"""
Store user activity logs in Cassandra
Perfect for time-series data
"""
user_id = columns.UUID(primary_key=True, default=uuid.uuid4)
timestamp = columns.DateTime(primary_key=True, clustering_order="DESC")
activity_type = columns.Text()
description = columns.Text()
metadata = columns.Map(columns.Text, columns.Text)
class Meta:
get_pk_field = 'user_id'
class SensorData(DjangoCassandraModel):
"""
IoT sensor data example
"""
sensor_id = columns.Text(primary_key=True)
recorded_at = columns.DateTime(primary_key=True, clustering_order="DESC")
temperature = columns.Float()
humidity = columns.Float()
location = columns.Text()
class Meta:
get_pk_field = 'sensor_id'
class ChatMessage(DjangoCassandraModel):
"""
Chat/messaging system
"""
room_id = columns.UUID(primary_key=True)
message_id = columns.TimeUUID(primary_key=True, default=uuid.uuid1, clustering_order="DESC")
user_id = columns.UUID()
username = columns.Text()
message = columns.Text()
timestamp = columns.DateTime()
class Meta:
get_pk_field = 'room_id'
Step 5: Sync Models with Cassandra
# Create keyspace and tables
python manage.py sync_cassandra
# Verify in CQL shell
cqlsh
# Check keyspace
USE myapp_keyspace;
# Show tables
DESCRIBE TABLES;
# Check table structure
DESCRIBE TABLE user_activity;
Step 6: Create Views to Use Cassandra Models
Create myapp/views.py:
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from .models import UserActivity, SensorData, ChatMessage
import json
import uuid
from datetime import datetime
@csrf_exempt
def log_activity(request):
"""Log user activity"""
if request.method == 'POST':
data = json.loads(request.body)
activity = UserActivity.create(
user_id=uuid.UUID(data['user_id']),
timestamp=datetime.now(),
activity_type=data['activity_type'],
description=data['description'],
metadata=data.get('metadata', {})
)
return JsonResponse({
'status': 'success',
'activity_id': str(activity.user_id)
})
def get_user_activities(request, user_id):
"""Get all activities for a user"""
activities = UserActivity.objects.filter(
user_id=uuid.UUID(user_id)
).limit(100)
results = [{
'user_id': str(a.user_id),
'timestamp': a.timestamp.isoformat(),
'activity_type': a.activity_type,
'description': a.description,
'metadata': a.metadata
} for a in activities]
return JsonResponse({'activities': results})
@csrf_exempt
def save_sensor_data(request):
"""Save IoT sensor data"""
if request.method == 'POST':
data = json.loads(request.body)
sensor = SensorData.create(
sensor_id=data['sensor_id'],
recorded_at=datetime.now(),
temperature=data['temperature'],
humidity=data['humidity'],
location=data['location']
)
return JsonResponse({'status': 'success'})
def get_sensor_readings(request, sensor_id):
"""Get recent sensor readings"""
readings = SensorData.objects.filter(
sensor_id=sensor_id
).limit(50)
results = [{
'sensor_id': r.sensor_id,
'recorded_at': r.recorded_at.isoformat(),
'temperature': r.temperature,
'humidity': r.humidity,
'location': r.location
} for r in readings]
return JsonResponse({'readings': results})
@csrf_exempt
def send_message(request):
"""Send chat message"""
if request.method == 'POST':
data = json.loads(request.body)
message = ChatMessage.create(
room_id=uuid.UUID(data['room_id']),
message_id=uuid.uuid1(),
user_id=uuid.UUID(data['user_id']),
username=data['username'],
message=data['message'],
timestamp=datetime.now()
)
return JsonResponse({'status': 'success'})
def get_room_messages(request, room_id):
"""Get messages from a chat room"""
messages = ChatMessage.objects.filter(
room_id=uuid.UUID(room_id)
).limit(100)
results = [{
'message_id': str(m.message_id),
'user_id': str(m.user_id),
'username': m.username,
'message': m.message,
'timestamp': m.timestamp.isoformat()
} for m in messages]
return JsonResponse({'messages': results})
Step 7: Configure URLs
Create myapp/urls.py:
from django.urls import path
from . import views
urlpatterns = [
# User activity endpoints
path('activity/log/', views.log_activity),
path('activity/<str:user_id>/', views.get_user_activities),
# Sensor data endpoints
path('sensor/save/', views.save_sensor_data),
path('sensor/<str:sensor_id>/', views.get_sensor_readings),
# Chat endpoints
path('chat/send/', views.send_message),
path('chat/room/<str:room_id>/', views.get_room_messages),
]
Update myproject/urls.py:
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
path('api/', include('myapp.urls')),
]
Step 8: Test Your Setup
# Run Django development server
python manage.py runserver
# Test endpoints using curl
# Log activity
curl -X POST http://localhost:8000/api/activity/log/ \
-H "Content-Type: application/json" \
-d '{
"user_id": "550e8400-e29b-41d4-a716-446655440000",
"activity_type": "login",
"description": "User logged in",
"metadata": {"ip": "192.168.1.1"}
}'
# Get user activities
curl http://localhost:8000/api/activity/550e8400-e29b-41d4-a716-446655440000/
# Save sensor data
curl -X POST http://localhost:8000/api/sensor/save/ \
-H "Content-Type: application/json" \
-d '{
"sensor_id": "SENSOR001",
"temperature": 25.5,
"humidity": 60.0,
"location": "Room 101"
}'
Part 2: Setup With Docker
Docker makes Cassandra setup much cleaner and reproducible. Let's containerize everything.
Step 1: Create Docker Configuration Files
Create docker-compose.yml in your project root:
version: '3.8'
services:
cassandra:
image: cassandra:4.1
container_name: cassandra_db
ports:
- "9042:9042"
- "9160:9160"
environment:
- CASSANDRA_CLUSTER_NAME=MyCluster
- CASSANDRA_DC=datacenter1
- CASSANDRA_RACK=rack1
- CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
- CASSANDRA_NUM_TOKENS=128
volumes:
- cassandra_data:/var/lib/cassandra
healthcheck:
test: ["CMD-SHELL", "cqlsh -e 'describe cluster'"]
interval: 30s
timeout: 10s
retries: 5
networks:
- django_network
django:
build: .
container_name: django_app
command: python manage.py runserver 0.0.0.0:8000
volumes:
- .:/app
ports:
- "8000:8000"
depends_on:
cassandra:
condition: service_healthy
environment:
- CASSANDRA_HOST=cassandra
- CASSANDRA_PORT=9042
networks:
- django_network
volumes:
cassandra_data:
networks:
django_network:
driver: bridge
Step 2: Create Dockerfile
Create Dockerfile in project root:
FROM python:3.11-slim
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
# Set work directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements.txt /app/
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
# Copy project
COPY . /app/
# Run migrations and start server
CMD ["sh", "-c", "python manage.py sync_cassandra && python manage.py runserver 0.0.0.0:8000"]
Step 3: Update Django Settings for Docker
Update myproject/settings.py:
import os
# ... existing settings ...
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
},
'cassandra': {
'ENGINE': 'django_cassandra_engine',
'NAME': 'myapp_keyspace',
'HOST': os.environ.get('CASSANDRA_HOST', '127.0.0.1'),
'PORT': int(os.environ.get('CASSANDRA_PORT', 9042)),
'OPTIONS': {
'replication': {
'strategy_class': 'SimpleStrategy',
'replication_factor': 1
},
'connection': {
'consistency': 'ONE',
'retry_connect': True,
'idle_heartbeat_interval': 30,
'idle_heartbeat_timeout': 10,
}
}
}
}
Step 4: Create .dockerignore
Create .dockerignore:
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.env
.git
.gitignore
*.sqlite3
*.log
Step 5: Build and Run with Docker
# Build and start all services
docker-compose up --build
# In another terminal, check Cassandra is ready
docker exec -it cassandra_db cqlsh
# Verify keyspace and tables
USE myapp_keyspace;
DESCRIBE TABLES;
# Check Django logs
docker-compose logs -f django
# Access your Django app
# Open browser: http://localhost:8000
Step 6: Useful Docker Commands
# Start services
docker-compose up -d
# Stop services
docker-compose down
# Stop and remove volumes (clean slate)
docker-compose down -v
# View logs
docker-compose logs -f
docker-compose logs cassandra
docker-compose logs django
# Execute commands in containers
docker exec -it cassandra_db cqlsh
docker exec -it django_app python manage.py shell
# Restart specific service
docker-compose restart django
# Scale Cassandra (multi-node cluster)
docker-compose up -d --scale cassandra=3
Step 7: Production Docker Setup
For production, create docker-compose.prod.yml:
version: '3.8'
services:
cassandra:
image: cassandra:4.1
container_name: cassandra_prod
ports:
- "9042:9042"
environment:
- CASSANDRA_CLUSTER_NAME=ProdCluster
- MAX_HEAP_SIZE=4G
- HEAP_NEWSIZE=800M
volumes:
- cassandra_prod_data:/var/lib/cassandra
deploy:
resources:
limits:
memory: 8G
cpus: '4'
networks:
- prod_network
django:
build: .
container_name: django_prod
command: gunicorn myproject.wsgi:application --bind 0.0.0.0:8000 --workers 4
volumes:
- static_volume:/app/staticfiles
expose:
- 8000
depends_on:
- cassandra
environment:
- DEBUG=False
- CASSANDRA_HOST=cassandra
networks:
- prod_network
nginx:
image: nginx:alpine
container_name: nginx_prod
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- static_volume:/app/staticfiles
ports:
- "80:80"
depends_on:
- django
networks:
- prod_network
volumes:
cassandra_prod_data:
static_volume:
networks:
prod_network:
driver: bridge
Comparison: Docker vs Without Docker
Without Docker
Pros:
- ✅ Direct control over installation
- ✅ Easier debugging initially
- ✅ No containerization overhead
- ✅ Good for learning Cassandra
Cons:
- ❌ Manual installation and configuration
- ❌ Environment inconsistencies
- ❌ Harder to replicate on other machines
- ❌ Cleanup is messy
With Docker
Pros:
- ✅ Consistent environment everywhere
- ✅ Easy setup and teardown
- ✅ Perfect for team collaboration
- ✅ Production-ready configuration
- ✅ Easy to scale
Cons:
- ❌ Learning curve for Docker
- ❌ Resource overhead
- ❌ Network complexity
Best Practices
1. Connection Pooling
# In settings.py
'OPTIONS': {
'connection': {
'consistency': 'ONE',
'connections_per_host': 5,
'protocol_version': 4,
}
}
2. Proper Data Modeling
# Good: Partition by natural key
class UserActivity(DjangoCassandraModel):
user_id = columns.UUID(primary_key=True)
timestamp = columns.DateTime(primary_key=True, clustering_order="DESC")
# Bad: Everything in one partition
class BadModel(DjangoCassandraModel):
id = columns.UUID(primary_key=True) # Will create hot partitions
3. Query Optimization
# Good: Query by partition key
activities = UserActivity.objects.filter(user_id=user_id).limit(100)
# Bad: Full table scan
all_activities = UserActivity.objects.all() # NEVER do this!
4. Error Handling
from cassandra.cluster import NoHostAvailable
from django.core.exceptions import ObjectDoesNotExist
try:
activity = UserActivity.objects.get(user_id=user_id)
except ObjectDoesNotExist:
return JsonResponse({'error': 'Not found'}, status=404)
except NoHostAvailable:
return JsonResponse({'error': 'Database unavailable'}, status=503)
Troubleshooting
Issue: Cassandra won't start
# Check logs
docker-compose logs cassandra
# Increase memory in docker-compose.yml
environment:
- MAX_HEAP_SIZE=2G
Issue: Django can't connect
# Add connection retry logic
'OPTIONS': {
'connection': {
'retry_connect': True,
'connect_timeout': 30,
}
}
Issue: Slow queries
# Check with TRACING in cqlsh
TRACING ON;
SELECT * FROM user_activity WHERE user_id = ...;
Conclusion
You now have two ways to integrate Cassandra with Django:
- Without Docker: Better for learning and local development
- With Docker: Better for teams and production
Choose based on your needs. Start simple, scale when necessary.
Remember: Cassandra is powerful but complex. Use it for the right use cases—high-write scenarios, time-series data, and distributed systems.
Questions about Cassandra + Django? Drop them in the comments!
Top comments (0)