I've seen this in every codebase that deals with social media data:
// Please don't do this
if (url.includes('tiktok.com')) {
const username = url.split('@')[1]?.split('/')[0]?.split('?')[0];
} else if (url.includes('instagram.com')) {
const username = url.split('instagram.com/')[1]?.split('/')[0]?.split('?')[0];
}
Fragile. Breaks on edge cases. Doesn't handle mobile URLs, short links, embedded URLs, or the 15 different formats each platform uses.
I wrote a proper parser once and never had to touch URL parsing code again. Here it is.
The Problem
Social media URLs are chaos. For a single TikTok video, users might paste any of these:
https://www.tiktok.com/@username/video/7234567890
https://tiktok.com/@username/video/7234567890
https://vm.tiktok.com/ZMrKxYz/ (short link)
https://www.tiktok.com/t/ZMrKxYz/ (another short format)
tiktok.com/@username (no protocol)
@username (just the handle)
For Instagram, you have:
https://www.instagram.com/username/
https://instagram.com/p/CxYz123/
https://www.instagram.com/reel/CxYz123/
https://www.instagram.com/stories/username/12345/
https://instagr.am/p/CxYz123/ (short domain)
A real parser needs to handle all of these without breaking.
The Parser
// social-url-parser.js
const PLATFORM_PATTERNS = {
tiktok: {
domains: ['tiktok.com', 'vm.tiktok.com'],
patterns: [
// Video: tiktok.com/@user/video/12345
{
regex: /tiktok\.com\/@([^/?#]+)\/video\/(\d+)/i,
extract: (m) => ({ type: 'video', username: m[1], videoId: m[2] }),
},
// Profile: tiktok.com/@user
{
regex: /tiktok\.com\/@([^/?#]+)/i,
extract: (m) => ({ type: 'profile', username: m[1] }),
},
// Short link: vm.tiktok.com/ZMrKxYz or tiktok.com/t/ZMrKxYz
{
regex: /(?:vm\.tiktok\.com|tiktok\.com\/t)\/([A-Za-z0-9]+)/i,
extract: (m) => ({ type: 'shortlink', shortCode: m[1] }),
},
// Music: tiktok.com/music/song-name-12345
{
regex: /tiktok\.com\/music\/([^/?#]+)-(\d+)/i,
extract: (m) => ({ type: 'music', slug: m[1], musicId: m[2] }),
},
// Tag: tiktok.com/tag/hashtag
{
regex: /tiktok\.com\/tag\/([^/?#]+)/i,
extract: (m) => ({ type: 'hashtag', tag: m[1] }),
},
],
},
instagram: {
domains: ['instagram.com', 'instagr.am'],
patterns: [
// Post: instagram.com/p/CODE
{
regex: /instagram\.com\/p\/([A-Za-z0-9_-]+)/i,
extract: (m) => ({ type: 'post', shortcode: m[1] }),
},
// Reel: instagram.com/reel/CODE
{
regex: /instagram\.com\/reel\/([A-Za-z0-9_-]+)/i,
extract: (m) => ({ type: 'reel', shortcode: m[1] }),
},
// Story: instagram.com/stories/username/12345
{
regex: /instagram\.com\/stories\/([^/?#]+)\/(\d+)/i,
extract: (m) => ({ type: 'story', username: m[1], storyId: m[2] }),
},
// Profile: instagram.com/username (must come last — it's the catch-all)
{
regex: /instagram\.com\/([A-Za-z0-9_.]+)\/?(?:\?|#|$)/i,
extract: (m) => {
const reserved = ['p', 'reel', 'stories', 'explore', 'accounts', 'direct', 'reels'];
if (reserved.includes(m[1].toLowerCase())) return null;
return { type: 'profile', username: m[1] };
},
},
],
},
youtube: {
domains: ['youtube.com', 'youtu.be', 'youtube.com'],
patterns: [
// Video: youtube.com/watch?v=ID or youtu.be/ID
{
regex: /(?:youtube\.com\/watch\?v=|youtu\.be\/)([A-Za-z0-9_-]{11})/i,
extract: (m) => ({ type: 'video', videoId: m[1] }),
},
// Shorts: youtube.com/shorts/ID
{
regex: /youtube\.com\/shorts\/([A-Za-z0-9_-]{11})/i,
extract: (m) => ({ type: 'short', videoId: m[1] }),
},
// Channel: youtube.com/@handle or /channel/ID
{
regex: /youtube\.com\/@([^/?#]+)/i,
extract: (m) => ({ type: 'channel', handle: m[1] }),
},
{
regex: /youtube\.com\/channel\/([A-Za-z0-9_-]+)/i,
extract: (m) => ({ type: 'channel', channelId: m[1] }),
},
],
},
twitter: {
domains: ['twitter.com', 'x.com'],
patterns: [
// Tweet: twitter.com/user/status/12345
{
regex: /(?:twitter|x)\.com\/([^/?#]+)\/status\/(\d+)/i,
extract: (m) => ({ type: 'tweet', username: m[1], tweetId: m[2] }),
},
// Profile: twitter.com/user
{
regex: /(?:twitter|x)\.com\/([A-Za-z0-9_]+)\/?(?:\?|#|$)/i,
extract: (m) => {
const reserved = ['home', 'explore', 'notifications', 'messages', 'settings', 'i', 'search'];
if (reserved.includes(m[1].toLowerCase())) return null;
return { type: 'profile', username: m[1] };
},
},
],
},
linkedin: {
domains: ['linkedin.com'],
patterns: [
// Post: linkedin.com/posts/user_activity-id
{
regex: /linkedin\.com\/posts\/([^/?#]+)/i,
extract: (m) => ({ type: 'post', slug: m[1] }),
},
// Profile: linkedin.com/in/username
{
regex: /linkedin\.com\/in\/([^/?#]+)/i,
extract: (m) => ({ type: 'profile', username: m[1] }),
},
// Company: linkedin.com/company/name
{
regex: /linkedin\.com\/company\/([^/?#]+)/i,
extract: (m) => ({ type: 'company', slug: m[1] }),
},
],
},
};
function parseHandle(input) {
// Handle bare @username or username inputs
const cleaned = input.trim().replace(/^@/, '');
if (/^[A-Za-z0-9_.]+$/.test(cleaned) && cleaned.length <= 30) {
return { platform: null, type: 'handle', username: cleaned };
}
return null;
}
function parseSocialUrl(input) {
if (!input || typeof input !== 'string') return null;
const trimmed = input.trim();
// Try as bare handle first
if (!trimmed.includes('/') && !trimmed.includes('.')) {
return parseHandle(trimmed);
}
// Normalize: add protocol if missing
let url = trimmed;
if (!url.startsWith('http')) {
url = 'https://' + url;
}
// Try each platform
for (const [platform, config] of Object.entries(PLATFORM_PATTERNS)) {
// Check if the URL matches any known domain
const matchesDomain = config.domains.some(d => url.includes(d));
if (!matchesDomain) continue;
// Try each pattern for this platform
for (const pattern of config.patterns) {
const match = url.match(pattern.regex);
if (match) {
const extracted = pattern.extract(match);
if (extracted) {
return { platform, ...extracted, originalUrl: trimmed };
}
}
}
}
return null; // Unrecognized
}
module.exports = { parseSocialUrl };
Usage
const { parseSocialUrl } = require('./social-url-parser');
// TikTok
parseSocialUrl('https://www.tiktok.com/@charlidamelio/video/7234567890');
// { platform: 'tiktok', type: 'video', username: 'charlidamelio', videoId: '7234567890' }
parseSocialUrl('https://vm.tiktok.com/ZMrKxYz/');
// { platform: 'tiktok', type: 'shortlink', shortCode: 'ZMrKxYz' }
parseSocialUrl('tiktok.com/@charlidamelio');
// { platform: 'tiktok', type: 'profile', username: 'charlidamelio' }
// Instagram
parseSocialUrl('https://instagram.com/p/CxYz123/');
// { platform: 'instagram', type: 'post', shortcode: 'CxYz123' }
parseSocialUrl('instagram.com/therock');
// { platform: 'instagram', type: 'profile', username: 'therock' }
// YouTube
parseSocialUrl('https://youtu.be/dQw4w9WgXcQ');
// { platform: 'youtube', type: 'video', videoId: 'dQw4w9WgXcQ' }
// Twitter/X — handles both domains
parseSocialUrl('https://x.com/elonmusk/status/1234567890');
// { platform: 'twitter', type: 'tweet', username: 'elonmusk', tweetId: '1234567890' }
// Bare handle
parseSocialUrl('@charlidamelio');
// { platform: null, type: 'handle', username: 'charlidamelio' }
Resolving Short Links
TikTok short links (vm.tiktok.com/xxx) redirect to the full URL. Here's how to resolve them:
const axios = require('axios');
async function resolveShortLink(shortUrl) {
try {
// Follow redirects and grab the final URL
const response = await axios.get(shortUrl, {
maxRedirects: 5,
validateStatus: () => true,
headers: { 'User-Agent': 'Mozilla/5.0' },
});
const finalUrl = response.request?.res?.responseUrl || response.headers?.location || shortUrl;
return parseSocialUrl(finalUrl);
} catch {
return null;
}
}
// Usage
const result = await resolveShortLink('https://vm.tiktok.com/ZMrKxYz/');
// Now you have the full username + video ID
Integrating with SociaVault API
Once you've parsed the URL, fetching the actual data is one API call:
const axios = require('axios');
const { parseSocialUrl } = require('./social-url-parser');
const api = axios.create({
baseURL: 'https://api.sociavault.com/v1/scrape',
headers: { 'x-api-key': process.env.SOCIAVAULT_API_KEY },
});
async function getDataFromUrl(input) {
const parsed = parseSocialUrl(input);
if (!parsed) throw new Error('Unrecognized URL format');
switch (`${parsed.platform}:${parsed.type}`) {
case 'tiktok:profile':
return api.get(`/tiktok/profile?username=${parsed.username}`);
case 'tiktok:video':
return api.get(`/tiktok/video-info?videoId=${parsed.videoId}`);
case 'instagram:profile':
return api.get(`/instagram/profile?username=${parsed.username}`);
case 'instagram:post':
case 'instagram:reel':
return api.get(`/instagram/post-info?shortcode=${parsed.shortcode}`);
default:
throw new Error(`Unsupported: ${parsed.platform}:${parsed.type}`);
}
}
// User pastes any URL → you get structured data
const data = await getDataFromUrl('https://www.tiktok.com/@charlidamelio');
Your users paste whatever garbage URL format they have. You parse it, identify the platform and content type, and fetch the right data. No more "please enter a valid URL" errors.
Testing
// test.js
const { parseSocialUrl } = require('./social-url-parser');
const testCases = [
['https://www.tiktok.com/@user/video/123', 'tiktok', 'video'],
['https://vm.tiktok.com/ZMrKxYz/', 'tiktok', 'shortlink'],
['tiktok.com/@user', 'tiktok', 'profile'],
['https://instagram.com/p/ABC123/', 'instagram', 'post'],
['https://www.instagram.com/reel/ABC123/', 'instagram', 'reel'],
['instagram.com/username', 'instagram', 'profile'],
['https://youtu.be/dQw4w9WgXcQ', 'youtube', 'video'],
['https://x.com/user/status/123', 'twitter', 'tweet'],
['@username', null, 'handle'],
];
let passed = 0;
for (const [input, expectedPlatform, expectedType] of testCases) {
const result = parseSocialUrl(input);
const ok = result?.platform === expectedPlatform && result?.type === expectedType;
console.log(`${ok ? '✅' : '❌'} ${input} → ${result?.platform}:${result?.type}`);
if (ok) passed++;
}
console.log(`\n${passed}/${testCases.length} passed`);
npm Package It
This is useful enough to be a standalone package. Throw it on npm:
npm init --scope=@yourname
# Add the parser as index.js
npm publish --access public
Now every project that deals with social URLs can npm install @yourname/social-url-parser instead of writing their own regex spaghetti.
Read the Full Guide
Universal Social Media URL Parser → SociaVault Blog
Parse social media URLs and fetch the data behind them with SociaVault — one API for TikTok, Instagram, YouTube, Twitter, LinkedIn, Reddit, and more.
Discussion
What's the worst URL parsing code you've seen in a codebase? I once found a 400-line switch statement that handled 3 platforms. Share your horror stories.
Top comments (0)