Building a Social Media Cross-Poster Bot with Web Scraping
Posting the same content across Twitter/X, LinkedIn, Bluesky, and Mastodon manually is tedious. Let us build an automated cross-poster that adapts content for each platform.
Architecture
- Content source: Your blog RSS feed, a Google Sheet, or manual input
- Adapter layer: Reformats content per platform (character limits, hashtags, mentions)
- Poster layer: API clients for each platform
- Scheduler: Optimal posting times per platform
Setup
pip install requests beautifulsoup4 feedparser tweepy atproto schedule
Content Source: RSS Feed Scraper
import feedparser
import requests
from bs4 import BeautifulSoup
from datetime import datetime
class ContentSource:
def __init__(self):
self.session = requests.Session()
def from_rss(self, feed_url, limit=5):
feed = feedparser.parse(feed_url)
posts = []
for entry in feed.entries[:limit]:
content = entry.get("summary", entry.get("description", ""))
soup = BeautifulSoup(content, "html.parser")
clean_text = soup.get_text()[:500]
posts.append({
"title": entry.title,
"url": entry.link,
"summary": clean_text,
"published": entry.get("published", datetime.now().isoformat()),
"tags": [tag.term for tag in entry.get("tags", [])]
})
return posts
def from_manual(self, text, url=None, tags=None):
return [{
"title": text[:100],
"url": url or "",
"summary": text,
"published": datetime.now().isoformat(),
"tags": tags or []
}]
Platform Adapters
class ContentAdapter:
LIMITS = {
"twitter": 280,
"bluesky": 300,
"linkedin": 3000,
"mastodon": 500
}
def adapt(self, post, platform):
limit = self.LIMITS.get(platform, 280)
if platform == "twitter":
return self.format_twitter(post, limit)
elif platform == "bluesky":
return self.format_bluesky(post, limit)
elif platform == "linkedin":
return self.format_linkedin(post, limit)
elif platform == "mastodon":
return self.format_mastodon(post, limit)
def format_twitter(self, post, limit):
hashtags = " ".join(f"#{t}" for t in post["tags"][:3])
url = post["url"]
available = limit - len(url) - len(hashtags) - 4
text = post["summary"][:available]
return f"{text}\n\n{url}\n{hashtags}"
def format_bluesky(self, post, limit):
text = post["summary"][:limit - len(post["url"]) - 2]
return f"{text}\n{post['url']}"
def format_linkedin(self, post, limit):
hashtags = " ".join(f"#{t}" for t in post["tags"][:5])
return f"{post['title']}\n\n{post['summary']}\n\n{post['url']}\n\n{hashtags}"
def format_mastodon(self, post, limit):
hashtags = " ".join(f"#{t}" for t in post["tags"][:4])
text = post["summary"][:limit - len(post["url"]) - len(hashtags) - 4]
return f"{text}\n\n{post['url']}\n{hashtags}"
Platform Posters
class TwitterPoster:
def __init__(self, api_key, api_secret, access_token, access_secret):
import tweepy
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_secret)
self.client = tweepy.Client(
consumer_key=api_key,
consumer_secret=api_secret,
access_token=access_token,
access_token_secret=access_secret
)
def post(self, text):
result = self.client.create_tweet(text=text)
tweet_id = result.data["id"]
return f"https://twitter.com/i/status/{tweet_id}"
class BlueskyPoster:
def __init__(self, handle, password):
from atproto import Client
self.client = Client()
self.client.login(handle, password)
def post(self, text):
response = self.client.send_post(text=text)
return f"https://bsky.app/profile/{self.client.me.handle}/post/{response.uri.split(/)[-1]}"
class LinkedInPoster:
def __init__(self, access_token):
self.token = access_token
self.headers = {
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json"
}
def post(self, text):
# Get user URN first
me = requests.get(
"https://api.linkedin.com/v2/userinfo",
headers=self.headers
).json()
payload = {
"author": f"urn:li:person:{me['sub']}",
"lifecycleState": "PUBLISHED",
"specificContent": {
"com.linkedin.ugc.ShareContent": {
"shareCommentary": {"text": text},
"shareMediaCategory": "NONE"
}
},
"visibility": {
"com.linkedin.ugc.MemberNetworkVisibility": "PUBLIC"
}
}
response = requests.post(
"https://api.linkedin.com/v2/ugcPosts",
headers=self.headers,
json=payload
)
return response.json()
class MastodonPoster:
def __init__(self, instance_url, access_token):
self.base_url = instance_url
self.headers = {"Authorization": f"Bearer {access_token}"}
def post(self, text):
response = requests.post(
f"{self.base_url}/api/v1/statuses",
headers=self.headers,
json={"status": text}
)
return response.json().get("url", "")
The Cross-Poster Bot
import time
import schedule
class CrossPoster:
def __init__(self):
self.source = ContentSource()
self.adapter = ContentAdapter()
self.platforms = {}
self.posted = set()
def add_platform(self, name, poster):
self.platforms[name] = poster
def cross_post(self, post):
post_id = post["url"] or post["title"]
if post_id in self.posted:
return
results = {}
for platform_name, poster in self.platforms.items():
try:
content = self.adapter.adapt(post, platform_name)
url = poster.post(content)
results[platform_name] = {"status": "success", "url": url}
print(f"Posted to {platform_name}: {url}")
time.sleep(5) # Stagger posts
except Exception as e:
results[platform_name] = {"status": "error", "error": str(e)}
print(f"Failed on {platform_name}: {e}")
self.posted.add(post_id)
return results
def auto_post_from_rss(self, feed_url):
posts = self.source.from_rss(feed_url, limit=1)
for post in posts:
self.cross_post(post)
# Setup
bot = CrossPoster()
bot.add_platform("bluesky", BlueskyPoster("you.bsky.social", "password"))
bot.add_platform("mastodon", MastodonPoster("https://mastodon.social", "token"))
# Post manually
bot.cross_post({
"title": "New blog post",
"url": "https://yourblog.com/post-1",
"summary": "Just published a guide on building cross-posting bots with Python!",
"tags": ["python", "automation", "socialmedia"]
})
# Or auto-post from RSS every hour
schedule.every(1).hours.do(bot.auto_post_from_rss, "https://yourblog.com/feed")
Scraping Optimal Posting Times
def scrape_best_times():
"""Research suggests these general windows work well."""
return {
"twitter": ["9:00", "12:00", "17:00"],
"linkedin": ["7:30", "12:00", "17:30"],
"bluesky": ["10:00", "14:00", "19:00"],
"mastodon": ["8:00", "13:00", "18:00"]
}
Proxy Services for Scraping
When scraping social media for content research:
- ScraperAPI — handles JS rendering for Twitter and LinkedIn
- ThorData — residential proxies for social media access
- ScrapeOps — monitor your scraping pipeline
Conclusion
A cross-poster bot saves hours of manual work each week. Start with two platforms, perfect the formatting, then expand. Always respect platform rate limits and avoid spammy behavior.
Follow for more Python automation projects!
Top comments (0)