<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Yogesh Bansal</title>
    <description>The latest articles on DEV Community by Yogesh Bansal (@yogeba).</description>
    <link>https://dev.to/yogeba</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F2166216%2Fe96c3b97-6033-4d30-b6ff-d8175209636e.jpg</url>
      <title>DEV Community: Yogesh Bansal</title>
      <link>https://dev.to/yogeba</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/yogeba"/>
    <language>en</language>
    <item>
      <title>Fetch Youtube video transcripts of travel creators</title>
      <dc:creator>Yogesh Bansal</dc:creator>
      <pubDate>Fri, 04 Oct 2024 10:08:54 +0000</pubDate>
      <link>https://dev.to/yogeba/fetch-youtube-video-transcripts-5c21</link>
      <guid>https://dev.to/yogeba/fetch-youtube-video-transcripts-5c21</guid>
      <description>&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;import requests
import csv
import time
import pandas as pd

API_KEY = 'YOUR_API_KEY'  # Replace with your actual YouTube Data API key

# List of search keywords related to travel
SEARCH_KEYWORDS = [
    'travel vlog',
    'travel guide',
    'food travel',
    'adventure travel',
    'backpacking',
    'cultural travel experiences',
    'travel tips',
    'travel destinations',
    'travel blogger',
    'world travel',
    'wanderlust',
    'exploring the world',
    'travel channel',
    'travel documentary',
    'road trips'
]

# Base URL for YouTube Data API
BASE_URL = 'https://www.googleapis.com/youtube/v3'

def collect_travel_channels(api_key, search_keywords):
    """Collects travel-related YouTube channels based on search keywords."""
    channels = []
    channel_ids_set = set()

    for keyword in search_keywords:
        print(f"Searching for keyword: {keyword}")
        params = {
            'part': 'snippet',
            'q': keyword,
            'type': 'channel',
            'maxResults': 50,
            'key': api_key
        }
        response = requests.get(f"{BASE_URL}/search", params=params)
        result = response.json()

        if 'items' in result:
            for item in result['items']:
                channel_id = item['snippet']['channelId']
                if channel_id not in channel_ids_set:
                    channel_ids_set.add(channel_id)
                    channels.append({
                        'channelId': channel_id,
                        'channelTitle': item['snippet']['channelTitle'],
                        'description': item['snippet']['description']
                    })
        else:
            print(f"Error in response: {result}")

        # Pause to respect API rate limits
        time.sleep(1)

    # Save channels to CSV
    channels_df = pd.DataFrame(channels)
    channels_df.to_csv('travel_channels.csv', index=False, encoding='utf-8')
    print("Finished collecting channel data.")

def collect_channel_videos(api_key):
    """Retrieves video lists for each channel."""
    channels_df = pd.read_csv('travel_channels.csv', encoding='utf-8')
    videos = []

    for index, row in channels_df.iterrows():
        channel_id = row['channelId']
        channel_title = row['channelTitle']
        print(f"Processing channel: {channel_title} (ID: {channel_id})")

        # Get uploads playlist ID
        uploads_playlist_id = get_uploads_playlist_id(api_key, channel_id)
        if uploads_playlist_id:
            # Get videos from playlist
            channel_videos = get_videos_from_playlist(api_key, uploads_playlist_id)
            for video in channel_videos:
                videos.append({
                    'channelId': channel_id,
                    'channelTitle': channel_title,
                    'videoId': video['videoId'],
                    'videoTitle': video['videoTitle'],
                    'publishedAt': video['publishedAt']
                })
        else:
            print(f"Skipping channel {channel_title} due to missing uploads playlist.")

        # Pause between channels
        time.sleep(1)

    # Save videos to CSV
    videos_df = pd.DataFrame(videos)
    videos_df.to_csv('channel_videos.csv', index=False, encoding='utf-8')
    print("Finished collecting video data.")

def get_uploads_playlist_id(api_key, channel_id):
    """Retrieves the uploads playlist ID for a given channel."""
    params = {
        'part': 'contentDetails',
        'id': channel_id,
        'key': api_key
    }
    response = requests.get(f"{BASE_URL}/channels", params=params)
    result = response.json()

    if 'items' in result and len(result['items']) &amp;gt; 0:
        uploads_playlist_id = result['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        return uploads_playlist_id
    else:
        print(f"Could not get uploads playlist for channel ID: {channel_id}")
        return None

def get_videos_from_playlist(api_key, playlist_id):
    """Retrieves all videos from a playlist."""
    videos = []
    params = {
        'part': 'snippet,contentDetails',
        'playlistId': playlist_id,
        'maxResults': 50,
        'key': api_key
    }

    while True:
        response = requests.get(f"{BASE_URL}/playlistItems", params=params)
        result = response.json()

        if 'items' in result:
            for item in result['items']:
                video_id = item['contentDetails']['videoId']
                video_title = item['snippet']['title']
                published_at = item['contentDetails']['videoPublishedAt']

                videos.append({
                    'videoId': video_id,
                    'videoTitle': video_title,
                    'publishedAt': published_at
                })

            if 'nextPageToken' in result:
                params['pageToken'] = result['nextPageToken']
                time.sleep(0.5)
            else:
                break
        else:
            print(f"Error retrieving videos: {result}")
            break

    return videos

def main():
    collect_travel_channels(API_KEY, SEARCH_KEYWORDS)
    collect_channel_videos(API_KEY)
    print("Data collection complete.")

if __name__ == '__main__':
    main()
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



</description>
      <category>travel</category>
      <category>webdev</category>
    </item>
  </channel>
</rss>
