Every influencer marketing platform charges $300+/month. The core feature? A searchable database of creators.
But here's their secret: it's just a PostgreSQL table with a good search index. The data comes from public APIs. The "AI-powered discovery" is a WHERE clause with some filters.
Let's build the same thing.
What We're Building
A creator database that:
- Ingests creator profiles from Instagram and TikTok
- Stores them in PostgreSQL with full-text search
- Lets you search by niche, follower range, engagement rate, location, and keywords
- Auto-enriches new profiles when they're looked up
- Exposes a simple REST API for querying
This is the backend that powers tools like Modash, Heepsy, and Upfluence. You'll have a working version by the end of this article.
The Stack
- Node.js + Express – API server
- PostgreSQL – storage + full-text search
- Prisma – database ORM
- SociaVault API – fetch creator profiles and posts
- node-cron – background enrichment jobs
Database Schema
// schema.prisma
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "postgresql"
url = env("DATABASE_URL")
}
model Creator {
id String @id @default(cuid())
platform String // instagram, tiktok
username String
displayName String?
bio String?
followers Int @default(0)
following Int @default(0)
postsCount Int @default(0)
avgLikes Int @default(0)
avgComments Int @default(0)
engagementRate Float @default(0)
profilePicUrl String?
isVerified Boolean @default(false)
category String? // detected niche
country String?
language String?
lastUpdated DateTime @default(now())
createdAt DateTime @default(now())
@@unique([platform, username])
@@index([platform, followers])
@@index([category])
@@index([engagementRate])
}
Run the migration:
npx prisma migrate dev --name init
The Key Trick: Full-Text Search
PostgreSQL has built-in full-text search. No Elasticsearch needed.
-- Add a generated tsvector column for search
ALTER TABLE "Creator" ADD COLUMN search_vector tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(username, '')), 'A') ||
setweight(to_tsvector('english', coalesce("displayName", '')), 'A') ||
setweight(to_tsvector('english', coalesce(bio, '')), 'B') ||
setweight(to_tsvector('english', coalesce(category, '')), 'C')
) STORED;
-- Index it
CREATE INDEX creator_search_idx ON "Creator" USING GIN (search_vector);
Username and display name get weight A (highest priority). Bio gets B. Category gets C. When you search "fitness coach LA", it'll match creators whose username contains "fitness", bio mentions "coach", and location includes "LA" — ranked by relevance.
Step 1: The Ingestion Pipeline
// ingest.js
const axios = require('axios');
const { PrismaClient } = require('@prisma/client');
const prisma = new PrismaClient();
const api = axios.create({
baseURL: 'https://api.sociavault.com/v1/scrape',
headers: { 'x-api-key': process.env.SOCIAVAULT_API_KEY },
});
async function ingestCreator(platform, username) {
// Check if we already have fresh data (< 7 days old)
const existing = await prisma.creator.findUnique({
where: { platform_username: { platform, username } },
});
if (existing) {
const daysSinceUpdate = (Date.now() - existing.lastUpdated.getTime()) / 86400000;
if (daysSinceUpdate < 7) return existing; // Still fresh
}
// Fetch profile
const profileEndpoint = platform === 'instagram'
? `/instagram/profile?username=${username}`
: `/tiktok/profile?username=${username}`;
const { data: profileRes } = await api.get(profileEndpoint);
const profile = profileRes.data || profileRes;
// Fetch recent posts for engagement calculation
const postsEndpoint = platform === 'instagram'
? `/instagram/posts?username=${username}&limit=12`
: `/tiktok/profile-videos?username=${username}&limit=12`;
const { data: postsRes } = await api.get(postsEndpoint);
const posts = postsRes.data || postsRes.posts || [];
// Calculate engagement metrics
const totalLikes = posts.reduce((s, p) => s + (p.likesCount || p.diggCount || 0), 0);
const totalComments = posts.reduce((s, p) => s + (p.commentsCount || p.commentCount || 0), 0);
const avgLikes = posts.length > 0 ? Math.round(totalLikes / posts.length) : 0;
const avgComments = posts.length > 0 ? Math.round(totalComments / posts.length) : 0;
const followers = profile.followersCount || profile.followerCount || 0;
const engagementRate = followers > 0
? parseFloat((((avgLikes + avgComments) / followers) * 100).toFixed(2))
: 0;
// Detect category from bio (simple keyword matching)
const category = detectCategory(profile.bio || profile.signature || '');
// Upsert into database
return prisma.creator.upsert({
where: { platform_username: { platform, username } },
create: {
platform,
username,
displayName: profile.fullName || profile.nickname || username,
bio: profile.bio || profile.signature || '',
followers,
following: profile.followingCount || 0,
postsCount: profile.postsCount || profile.videoCount || 0,
avgLikes,
avgComments,
engagementRate,
profilePicUrl: profile.profilePicUrl || profile.avatarUrl || null,
isVerified: profile.isVerified || false,
category,
},
update: {
displayName: profile.fullName || profile.nickname || username,
bio: profile.bio || profile.signature || '',
followers,
following: profile.followingCount || 0,
postsCount: profile.postsCount || profile.videoCount || 0,
avgLikes,
avgComments,
engagementRate,
profilePicUrl: profile.profilePicUrl || profile.avatarUrl || null,
isVerified: profile.isVerified || false,
category,
lastUpdated: new Date(),
},
});
}
function detectCategory(bio) {
const lower = bio.toLowerCase();
const categories = {
'fitness': ['fitness', 'gym', 'workout', 'personal trainer', 'health coach'],
'beauty': ['beauty', 'makeup', 'skincare', 'cosmetics', 'hair'],
'food': ['food', 'recipe', 'cooking', 'chef', 'restaurant', 'baking'],
'travel': ['travel', 'wanderlust', 'adventure', 'explore', 'backpack'],
'tech': ['tech', 'developer', 'coding', 'software', 'startup', 'saas'],
'fashion': ['fashion', 'style', 'outfit', 'clothing', 'designer'],
'gaming': ['gaming', 'gamer', 'twitch', 'esports', 'streamer'],
'music': ['music', 'musician', 'singer', 'producer', 'dj'],
'education': ['teacher', 'education', 'learn', 'tutor', 'professor'],
'business': ['entrepreneur', 'ceo', 'founder', 'business', 'marketing'],
};
for (const [category, keywords] of Object.entries(categories)) {
if (keywords.some(kw => lower.includes(kw))) return category;
}
return 'other';
}
module.exports = { ingestCreator };
Step 2: The Search API
// server.js
const express = require('express');
const { PrismaClient } = require('@prisma/client');
const { ingestCreator } = require('./ingest');
const app = express();
const prisma = new PrismaClient();
// Search creators
app.get('/api/creators/search', async (req, res) => {
const {
q, // text search query
platform, // instagram or tiktok
category, // fitness, beauty, tech, etc.
minFollowers,
maxFollowers,
minEngagement,
sort = 'followers',
order = 'desc',
page = 1,
limit = 20,
} = req.query;
const where = {};
const AND = [];
if (platform) AND.push({ platform });
if (category) AND.push({ category });
if (minFollowers) AND.push({ followers: { gte: parseInt(minFollowers) } });
if (maxFollowers) AND.push({ followers: { lte: parseInt(maxFollowers) } });
if (minEngagement) AND.push({ engagementRate: { gte: parseFloat(minEngagement) } });
if (AND.length > 0) where.AND = AND;
// Full-text search using raw SQL for the tsvector
let creators;
const offset = (parseInt(page) - 1) * parseInt(limit);
if (q) {
// Use PostgreSQL full-text search
const searchQuery = q.split(' ').join(' & '); // AND between terms
creators = await prisma.$queryRaw`
SELECT *, ts_rank(search_vector, to_tsquery('english', ${searchQuery})) as rank
FROM "Creator"
WHERE search_vector @@ to_tsquery('english', ${searchQuery})
${platform ? prisma.$queryRaw`AND platform = ${platform}` : prisma.$queryRaw``}
${category ? prisma.$queryRaw`AND category = ${category}` : prisma.$queryRaw``}
${minFollowers ? prisma.$queryRaw`AND followers >= ${parseInt(minFollowers)}` : prisma.$queryRaw``}
${maxFollowers ? prisma.$queryRaw`AND followers <= ${parseInt(maxFollowers)}` : prisma.$queryRaw``}
ORDER BY rank DESC
LIMIT ${parseInt(limit)} OFFSET ${offset}
`;
} else {
creators = await prisma.creator.findMany({
where,
orderBy: { [sort]: order },
skip: offset,
take: parseInt(limit),
});
}
const total = await prisma.creator.count({ where: q ? undefined : where });
res.json({
creators,
pagination: {
page: parseInt(page),
limit: parseInt(limit),
total,
pages: Math.ceil(total / parseInt(limit)),
},
});
});
// Lookup + auto-ingest a specific creator
app.get('/api/creators/:platform/:username', async (req, res) => {
const { platform, username } = req.params;
try {
const creator = await ingestCreator(platform, username);
res.json(creator);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.listen(3000, () => console.log('Creator DB API running on :3000'));
Step 3: Seed the Database
You need initial data. Here's a script that ingests a list of known creators:
// seed.js
const { ingestCreator } = require('./ingest');
const SEED_CREATORS = [
{ platform: 'instagram', username: 'therock' },
{ platform: 'instagram', username: 'cristiano' },
{ platform: 'tiktok', username: 'charlidamelio' },
{ platform: 'tiktok', username: 'khaby.lame' },
// Add more from your niche...
];
async function seed() {
for (const creator of SEED_CREATORS) {
try {
console.log(`Ingesting ${creator.platform}/@${creator.username}...`);
await ingestCreator(creator.platform, creator.username);
await new Promise(r => setTimeout(r, 1000)); // Rate limit
} catch (err) {
console.error(`Failed: ${creator.username} — ${err.message}`);
}
}
console.log('Seeding complete.');
}
seed();
Usage Examples
# Search for fitness creators on Instagram with 10K-100K followers
curl "localhost:3000/api/creators/search?q=fitness&platform=instagram&minFollowers=10000&maxFollowers=100000"
# Find beauty creators with 3%+ engagement rate
curl "localhost:3000/api/creators/search?category=beauty&minEngagement=3&sort=engagementRate"
# Look up a specific creator (auto-fetches if not in DB)
curl "localhost:3000/api/creators/instagram/some_creator"
Why This Beats Spreadsheets
The moment you have 500+ creators in the database, spreadsheets become useless. You can't full-text search a CSV. You can't filter by engagement rate AND follower count AND niche in Google Sheets without losing your mind.
With this setup, queries like "find me fitness creators on TikTok with 50K-200K followers and 4%+ engagement" take 3 milliseconds.
What I'd Add Next
- Background refresh job — cron that re-fetches the oldest profiles daily
- Duplicate detection — same creator on Instagram and TikTok, link them
- Export to CSV — for people who still need spreadsheets
- Simple frontend — a search bar + filter sidebar + results grid
Read the Full Guide
Build a Creator Database → SociaVault Blog
Build influencer databases with SociaVault — one API for profiles, posts, and engagement data across TikTok, Instagram, YouTube, and 10+ platforms.
Discussion
If you've built a creator/influencer database before, what was your biggest pain point? Schema design? Keeping data fresh? Handling duplicates across platforms?
Top comments (0)