DEV Community

drake
drake

Posted on

X 开发者账号官方API list页的接口

代码

import json
import time
from datetime import datetime
from zoneinfo import ZoneInfo
from scrapy import Spider, Request
from urllib.parse import urlencode
from config import config
from .config import list_ids
from utils.spider_failed_alert import ErrorMonitor

"""
采集所有list动态,但是不包括评论, 官方接口
"""

class TwitterListOfficial(Spider):
    author = 'drake.shi'
    name = 'twitter_list_official'
    MONGO_COL = 'twitter_list'
    change_ua = False
    # 不走代理
    proxy = False
    # 20分钟调度一次 (理论上提高频率可以获取更多的数据)
    schedule_time = 12 * 20
    api = "https://api.twitter.com/2/lists/{}/tweets"
    custom_settings = {
        'DNSCACHE_ENABLED': False,
        'REACTOR_THREADPOOL_MAXSIZE': 1,
        'DOWNLOAD_DELAY': 5
    }

    def start_requests(self):
        BEARER_TOKEN = config.X_BEARER_TOKEN

        for id in list_ids:
            headers = {
                "Authorization": f"Bearer {BEARER_TOKEN}",
                "Connection": "close",  # ✅ 关键项:不复用 keep-alive 长连接
            }
            params = {
                "max_results": 100,
                "tweet.fields": "attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,id,in_reply_to_user_id,lang,public_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld",
                "expansions": "author_id,attachments.media_keys,attachments.poll_ids,geo.place_id,in_reply_to_user_id,referenced_tweets.id,referenced_tweets.id.author_id",
                "user.fields": "created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld",
                "media.fields": "duration_ms,height,media_key,preview_image_url,type,url,width,public_metrics,alt_text",
                "poll.fields": "duration_minutes,end_datetime,id,options,voting_status",
                "place.fields": "contained_within,country,country_code,full_name,geo,id,name,place_type"
            }
            # params = {
            #     "tweet.fields": "created_at,author_id,text,public_metrics",
            #     "expansions": "author_id",
            #     "user.fields": "id,name,username,profile_image_url"
            # }
            url = self.api.format(id)
            url = f"{url}?{urlencode(params)}"
            yield Request(
                url=url,
                headers=headers
            )

    @ErrorMonitor(name, author)
    def parse(self, response, **kwargs):
        """
        每次请求返回的帖子数量约为100条
        """
        data = json.loads(response.text)
        # 帖子信息
        tweets = data["data"]
        # 用户信息
        users = data.get("includes", {}).get("users", [])
        user_map = {u["id"]: u for u in users}
        # 重构数据结构
        for tweet in tweets:
            user = user_map.get(tweet["author_id"], {})
            # https://pbs.twimg.com/profile_images/1675587952015974400/jvaLP8ty_normal.jpg
            # 默认获取到的压缩后的头像,需要转换为原始头像(大图)
            icon = user['profile_image_url']
            icon = icon.replace('_normal', '')
            user_info_ = {
                # 昵称
                'name': user['name'],
                # 用户名
                'user_name': user['username'],
                # 用户ID
                'user_id': user['id'],
                'rest_id': user['id'],
                # 钱包地址
                'wallet_address':[],
                # 粉丝量
                'followers': user['public_metrics']['followers_count'],
                # 关注量
                'following': user['public_metrics']['following_count'],
                # 点赞量
                'favourites_count': user['public_metrics']['like_count'],
                # 发帖量
                'media_count': user['public_metrics']['media_count'],
                'listed_count': user['public_metrics']['listed_count'],
                'statuses_count': user['public_metrics']['tweet_count'],
                # 头像
                'icon': icon
            }
            # 转换成东八区的时间
            created_at = tweet['created_at']
            iso_ts = created_at.replace("Z", "+00:00")  # "Z" → "+00:00"
            dt_utc = datetime.fromisoformat(iso_ts)
            dt_shanghai = dt_utc.astimezone(ZoneInfo("Asia/Shanghai"))
            created_at_8 = dt_shanghai.strftime("%Y-%m-%d %H:%M:%S")

            item = {
                '_id': tweet['id'],
                # 内容
                'content': tweet['text'],
                # 创建时间
                'created_at': created_at_8,
                # 评论内容
                'comments': [],
                # 点赞数
                'favorite_count': tweet['public_metrics']['like_count'],
                # 回复数
                'reply_count':  tweet['public_metrics']['reply_count'],
                # 转发数
                'retweet_count': tweet['public_metrics']['retweet_count'],
            }
            item['user_info'] = user_info_
            yield item

Enter fullscreen mode Exit fullscreen mode

响应体解析后的单个Item样例

{
    "author_id": "1482629674035466240",
    "text": "⚡️#TAO/USDT⚡️\n\n🟢LONG/BUY: 311.80 - 305.00\n\n🏹Targets:  317.00  -  325.00 - 335.00 - 350.00 - 380.00+🚀\n\n❌Stop Loss 298.00\n\n‼️Leverage: 20X 10X (Use Leverage according to your risk management)\n\n👉Use only upto 5% of Total Funds\n#qatar #UAE #Dubai #Kuwait\nhttps://t.co/132yLwxbph",
    "possibly_sensitive": false,
    "entities": {
        "hashtags": [
            {
                "start": 2,
                "end": 6,
                "tag": "TAO"
            },
            {
                "start": 224,
                "end": 230,
                "tag": "qatar"
            },
            {
                "start": 231,
                "end": 235,
                "tag": "UAE"
            },
            {
                "start": 236,
                "end": 242,
                "tag": "Dubai"
            },
            {
                "start": 243,
                "end": 250,
                "tag": "Kuwait"
            }
        ],
        "urls": [
            {
                "start": 251,
                "end": 274,
                "url": "https://t.co/132yLwxbph",
                "expanded_url": "https://t.me/+phaeKOjznO42ZmE8",
                "display_url": "t.me/+phaeKOjznO42Z…",
                "images": [
                    {
                        "url": "https://pbs.twimg.com/news_img/1912650169780150274/hEh0KHn2?format=png&name=orig",
                        "width": 256,
                        "height": 256
                    },
                    {
                        "url": "https://pbs.twimg.com/news_img/1912650169780150274/hEh0KHn2?format=png&name=150x150",
                        "width": 150,
                        "height": 150
                    }
                ],
                "status": 200,
                "title": "Join group chat on Telegram",
                "description": "You are invited to a group chat on Telegram. Click to",
                "unwound_url": "https://t.me/+phaeKOjznO42ZmE8"
            }
        ],
        "annotations": [
            {
                "start": 232,
                "end": 234,
                "probability": 0.905,
                "type": "Place",
                "normalized_text": "UAE"
            },
            {
                "start": 237,
                "end": 241,
                "probability": 0.9325,
                "type": "Place",
                "normalized_text": "Dubai"
            },
            {
                "start": 244,
                "end": 249,
                "probability": 0.8952,
                "type": "Place",
                "normalized_text": "Kuwait"
            }
        ]
    },
    "reply_settings": "everyone",
    "created_at": "2025-04-23T02:31:02.000Z",
    "edit_history_tweet_ids": [
        "1914869594365231388"
    ],
    "lang": "en",
    "public_metrics": {
        "retweet_count": 0,
        "reply_count": 0,
        "like_count": 0,
        "quote_count": 0,
        "bookmark_count": 0,
        "impression_count": 26
    },
    "id": "1914869594365231388",
    "conversation_id": "1914869594365231388",
    "context_annotations": [
        {
            "domain": {
                "id": "46",
                "name": "Business Taxonomy",
                "description": "Categories within Brand Verticals that narrow down the scope of Brands"
            },
            "entity": {
                "id": "1557696848252391426",
                "name": "Financial Services Business",
                "description": "Brands, companies, advertisers and every non-person handle with the profit intent related to Banks, Credit cards, Insurance, Investments, Stocks "
            }
        },
        {
            "domain": {
                "id": "30",
                "name": "Entities [Entity Service]",
                "description": "Entity Service top level domain, every item that is in Entity Service should be in this domain"
            },
            "entity": {
                "id": "1139229087682068480",
                "name": "Tether cryptocurrency"
            }
        },
        {
            "domain": {
                "id": "131",
                "name": "Unified Twitter Taxonomy",
                "description": "A taxonomy of user interests. "
            },
            "entity": {
                "id": "913142676819648512",
                "name": "Cryptocurrencies",
                "description": "Cryptocurrency"
            }
        },
        {
            "domain": {
                "id": "131",
                "name": "Unified Twitter Taxonomy",
                "description": "A taxonomy of user interests. "
            },
            "entity": {
                "id": "1139229087682068480",
                "name": "Tether cryptocurrency"
            }
        },
        {
            "domain": {
                "id": "131",
                "name": "Unified Twitter Taxonomy",
                "description": "A taxonomy of user interests. "
            },
            "entity": {
                "id": "1484181943616884743",
                "name": "Cryptocoins"
            }
        },
        {
            "domain": {
                "id": "131",
                "name": "Unified Twitter Taxonomy",
                "description": "A taxonomy of user interests. "
            },
            "entity": {
                "id": "1492162686204854274",
                "name": "Digital assets & cryptocurrency",
                "description": "Cryptocurrency"
            }
        }
    ],

}
Enter fullscreen mode Exit fullscreen mode

Top comments (0)