DEV Community

agenthustler
agenthustler

Posted on

Building a Social Media Cross-Poster Bot with Web Scraping

Building a Social Media Cross-Poster Bot with Web Scraping

Posting the same content across Twitter/X, LinkedIn, Bluesky, and Mastodon manually is tedious. Let us build an automated cross-poster that adapts content for each platform.

Architecture

  1. Content source: Your blog RSS feed, a Google Sheet, or manual input
  2. Adapter layer: Reformats content per platform (character limits, hashtags, mentions)
  3. Poster layer: API clients for each platform
  4. Scheduler: Optimal posting times per platform

Setup

pip install requests beautifulsoup4 feedparser tweepy atproto schedule
Enter fullscreen mode Exit fullscreen mode

Content Source: RSS Feed Scraper

import feedparser
import requests
from bs4 import BeautifulSoup
from datetime import datetime

class ContentSource:
    def __init__(self):
        self.session = requests.Session()

    def from_rss(self, feed_url, limit=5):
        feed = feedparser.parse(feed_url)
        posts = []

        for entry in feed.entries[:limit]:
            content = entry.get("summary", entry.get("description", ""))
            soup = BeautifulSoup(content, "html.parser")
            clean_text = soup.get_text()[:500]

            posts.append({
                "title": entry.title,
                "url": entry.link,
                "summary": clean_text,
                "published": entry.get("published", datetime.now().isoformat()),
                "tags": [tag.term for tag in entry.get("tags", [])]
            })
        return posts

    def from_manual(self, text, url=None, tags=None):
        return [{
            "title": text[:100],
            "url": url or "",
            "summary": text,
            "published": datetime.now().isoformat(),
            "tags": tags or []
        }]
Enter fullscreen mode Exit fullscreen mode

Platform Adapters

class ContentAdapter:
    LIMITS = {
        "twitter": 280,
        "bluesky": 300,
        "linkedin": 3000,
        "mastodon": 500
    }

    def adapt(self, post, platform):
        limit = self.LIMITS.get(platform, 280)

        if platform == "twitter":
            return self.format_twitter(post, limit)
        elif platform == "bluesky":
            return self.format_bluesky(post, limit)
        elif platform == "linkedin":
            return self.format_linkedin(post, limit)
        elif platform == "mastodon":
            return self.format_mastodon(post, limit)

    def format_twitter(self, post, limit):
        hashtags = " ".join(f"#{t}" for t in post["tags"][:3])
        url = post["url"]

        available = limit - len(url) - len(hashtags) - 4
        text = post["summary"][:available]

        return f"{text}\n\n{url}\n{hashtags}"

    def format_bluesky(self, post, limit):
        text = post["summary"][:limit - len(post["url"]) - 2]
        return f"{text}\n{post['url']}"

    def format_linkedin(self, post, limit):
        hashtags = " ".join(f"#{t}" for t in post["tags"][:5])
        return f"{post['title']}\n\n{post['summary']}\n\n{post['url']}\n\n{hashtags}"

    def format_mastodon(self, post, limit):
        hashtags = " ".join(f"#{t}" for t in post["tags"][:4])
        text = post["summary"][:limit - len(post["url"]) - len(hashtags) - 4]
        return f"{text}\n\n{post['url']}\n{hashtags}"
Enter fullscreen mode Exit fullscreen mode

Platform Posters

class TwitterPoster:
    def __init__(self, api_key, api_secret, access_token, access_secret):
        import tweepy
        auth = tweepy.OAuthHandler(api_key, api_secret)
        auth.set_access_token(access_token, access_secret)
        self.client = tweepy.Client(
            consumer_key=api_key,
            consumer_secret=api_secret,
            access_token=access_token,
            access_token_secret=access_secret
        )

    def post(self, text):
        result = self.client.create_tweet(text=text)
        tweet_id = result.data["id"]
        return f"https://twitter.com/i/status/{tweet_id}"


class BlueskyPoster:
    def __init__(self, handle, password):
        from atproto import Client
        self.client = Client()
        self.client.login(handle, password)

    def post(self, text):
        response = self.client.send_post(text=text)
        return f"https://bsky.app/profile/{self.client.me.handle}/post/{response.uri.split(/)[-1]}"


class LinkedInPoster:
    def __init__(self, access_token):
        self.token = access_token
        self.headers = {
            "Authorization": f"Bearer {self.token}",
            "Content-Type": "application/json"
        }

    def post(self, text):
        # Get user URN first
        me = requests.get(
            "https://api.linkedin.com/v2/userinfo",
            headers=self.headers
        ).json()

        payload = {
            "author": f"urn:li:person:{me['sub']}",
            "lifecycleState": "PUBLISHED",
            "specificContent": {
                "com.linkedin.ugc.ShareContent": {
                    "shareCommentary": {"text": text},
                    "shareMediaCategory": "NONE"
                }
            },
            "visibility": {
                "com.linkedin.ugc.MemberNetworkVisibility": "PUBLIC"
            }
        }

        response = requests.post(
            "https://api.linkedin.com/v2/ugcPosts",
            headers=self.headers,
            json=payload
        )
        return response.json()


class MastodonPoster:
    def __init__(self, instance_url, access_token):
        self.base_url = instance_url
        self.headers = {"Authorization": f"Bearer {access_token}"}

    def post(self, text):
        response = requests.post(
            f"{self.base_url}/api/v1/statuses",
            headers=self.headers,
            json={"status": text}
        )
        return response.json().get("url", "")
Enter fullscreen mode Exit fullscreen mode

The Cross-Poster Bot

import time
import schedule

class CrossPoster:
    def __init__(self):
        self.source = ContentSource()
        self.adapter = ContentAdapter()
        self.platforms = {}
        self.posted = set()

    def add_platform(self, name, poster):
        self.platforms[name] = poster

    def cross_post(self, post):
        post_id = post["url"] or post["title"]
        if post_id in self.posted:
            return

        results = {}
        for platform_name, poster in self.platforms.items():
            try:
                content = self.adapter.adapt(post, platform_name)
                url = poster.post(content)
                results[platform_name] = {"status": "success", "url": url}
                print(f"Posted to {platform_name}: {url}")
                time.sleep(5)  # Stagger posts
            except Exception as e:
                results[platform_name] = {"status": "error", "error": str(e)}
                print(f"Failed on {platform_name}: {e}")

        self.posted.add(post_id)
        return results

    def auto_post_from_rss(self, feed_url):
        posts = self.source.from_rss(feed_url, limit=1)
        for post in posts:
            self.cross_post(post)

# Setup
bot = CrossPoster()
bot.add_platform("bluesky", BlueskyPoster("you.bsky.social", "password"))
bot.add_platform("mastodon", MastodonPoster("https://mastodon.social", "token"))

# Post manually
bot.cross_post({
    "title": "New blog post",
    "url": "https://yourblog.com/post-1",
    "summary": "Just published a guide on building cross-posting bots with Python!",
    "tags": ["python", "automation", "socialmedia"]
})

# Or auto-post from RSS every hour
schedule.every(1).hours.do(bot.auto_post_from_rss, "https://yourblog.com/feed")
Enter fullscreen mode Exit fullscreen mode

Scraping Optimal Posting Times

def scrape_best_times():
    """Research suggests these general windows work well."""
    return {
        "twitter": ["9:00", "12:00", "17:00"],
        "linkedin": ["7:30", "12:00", "17:30"],
        "bluesky": ["10:00", "14:00", "19:00"],
        "mastodon": ["8:00", "13:00", "18:00"]
    }
Enter fullscreen mode Exit fullscreen mode

Proxy Services for Scraping

When scraping social media for content research:

  • ScraperAPI — handles JS rendering for Twitter and LinkedIn
  • ThorData — residential proxies for social media access
  • ScrapeOps — monitor your scraping pipeline

Conclusion

A cross-poster bot saves hours of manual work each week. Start with two platforms, perfect the formatting, then expand. Always respect platform rate limits and avoid spammy behavior.


Follow for more Python automation projects!

Top comments (0)