import requests
import csv
import time
import pandas as pd
API_KEY = 'YOUR_API_KEY' # Replace with your actual YouTube Data API key
# List of search keywords related to travel
SEARCH_KEYWORDS = [
'travel vlog',
'travel guide',
'food travel',
'adventure travel',
'backpacking',
'cultural travel experiences',
'travel tips',
'travel destinations',
'travel blogger',
'world travel',
'wanderlust',
'exploring the world',
'travel channel',
'travel documentary',
'road trips'
]
# Base URL for YouTube Data API
BASE_URL = 'https://www.googleapis.com/youtube/v3'
def collect_travel_channels(api_key, search_keywords):
"""Collects travel-related YouTube channels based on search keywords."""
channels = []
channel_ids_set = set()
for keyword in search_keywords:
print(f"Searching for keyword: {keyword}")
params = {
'part': 'snippet',
'q': keyword,
'type': 'channel',
'maxResults': 50,
'key': api_key
}
response = requests.get(f"{BASE_URL}/search", params=params)
result = response.json()
if 'items' in result:
for item in result['items']:
channel_id = item['snippet']['channelId']
if channel_id not in channel_ids_set:
channel_ids_set.add(channel_id)
channels.append({
'channelId': channel_id,
'channelTitle': item['snippet']['channelTitle'],
'description': item['snippet']['description']
})
else:
print(f"Error in response: {result}")
# Pause to respect API rate limits
time.sleep(1)
# Save channels to CSV
channels_df = pd.DataFrame(channels)
channels_df.to_csv('travel_channels.csv', index=False, encoding='utf-8')
print("Finished collecting channel data.")
def collect_channel_videos(api_key):
"""Retrieves video lists for each channel."""
channels_df = pd.read_csv('travel_channels.csv', encoding='utf-8')
videos = []
for index, row in channels_df.iterrows():
channel_id = row['channelId']
channel_title = row['channelTitle']
print(f"Processing channel: {channel_title} (ID: {channel_id})")
# Get uploads playlist ID
uploads_playlist_id = get_uploads_playlist_id(api_key, channel_id)
if uploads_playlist_id:
# Get videos from playlist
channel_videos = get_videos_from_playlist(api_key, uploads_playlist_id)
for video in channel_videos:
videos.append({
'channelId': channel_id,
'channelTitle': channel_title,
'videoId': video['videoId'],
'videoTitle': video['videoTitle'],
'publishedAt': video['publishedAt']
})
else:
print(f"Skipping channel {channel_title} due to missing uploads playlist.")
# Pause between channels
time.sleep(1)
# Save videos to CSV
videos_df = pd.DataFrame(videos)
videos_df.to_csv('channel_videos.csv', index=False, encoding='utf-8')
print("Finished collecting video data.")
def get_uploads_playlist_id(api_key, channel_id):
"""Retrieves the uploads playlist ID for a given channel."""
params = {
'part': 'contentDetails',
'id': channel_id,
'key': api_key
}
response = requests.get(f"{BASE_URL}/channels", params=params)
result = response.json()
if 'items' in result and len(result['items']) > 0:
uploads_playlist_id = result['items'][0]['contentDetails']['relatedPlaylists']['uploads']
return uploads_playlist_id
else:
print(f"Could not get uploads playlist for channel ID: {channel_id}")
return None
def get_videos_from_playlist(api_key, playlist_id):
"""Retrieves all videos from a playlist."""
videos = []
params = {
'part': 'snippet,contentDetails',
'playlistId': playlist_id,
'maxResults': 50,
'key': api_key
}
while True:
response = requests.get(f"{BASE_URL}/playlistItems", params=params)
result = response.json()
if 'items' in result:
for item in result['items']:
video_id = item['contentDetails']['videoId']
video_title = item['snippet']['title']
published_at = item['contentDetails']['videoPublishedAt']
videos.append({
'videoId': video_id,
'videoTitle': video_title,
'publishedAt': published_at
})
if 'nextPageToken' in result:
params['pageToken'] = result['nextPageToken']
time.sleep(0.5)
else:
break
else:
print(f"Error retrieving videos: {result}")
break
return videos
def main():
collect_travel_channels(API_KEY, SEARCH_KEYWORDS)
collect_channel_videos(API_KEY)
print("Data collection complete.")
if __name__ == '__main__':
main()
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)