<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: Chethan Yadav</title>
    <description>The latest articles on DEV Community by Chethan Yadav (@chethanyadav).</description>
    <link>https://dev.to/chethanyadav</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1844261%2F7c291da0-77fa-4f62-b22d-52f904f11991.jpg</url>
      <title>DEV Community: Chethan Yadav</title>
      <link>https://dev.to/chethanyadav</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/chethanyadav"/>
    <language>en</language>
    <item>
      <title>The Tech News Scraper</title>
      <dc:creator>Chethan Yadav</dc:creator>
      <pubDate>Sun, 29 Dec 2024 01:36:26 +0000</pubDate>
      <link>https://dev.to/chethanyadav/the-tech-news-scraper-14kc</link>
      <guid>https://dev.to/chethanyadav/the-tech-news-scraper-14kc</guid>
      <description>&lt;p&gt;&lt;em&gt;This is a submission for the &lt;a href="https://dev.to/challenges/brightdata"&gt;Bright Data Web Scraping Challenge&lt;/a&gt;: Scrape Data from Complex, Interactive Websites&lt;/em&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;This project scrapes data from websites that offer the latest technological news and updates. It uses JavaScript and Node.js, with Puppeteer and the Bright Data Scraping Browser to handle dynamic content. It scrapes data from two major websites: &lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;&lt;a href="https://www.artificialintelligence-news.com/artificial-intelligence-news/" rel="noopener noreferrer"&gt;Artificial Intelligence News&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://thehackernews.com/" rel="noopener noreferrer"&gt;The Hacker News&lt;/a&gt;&lt;/li&gt;
&lt;/ol&gt;

&lt;h2&gt;
  
  
  Demo
&lt;/h2&gt;

&lt;p&gt;You can view the source code and instructions for running the project on &lt;a href="https://github.com/chethanyadav456/Scraping_Master" rel="noopener noreferrer"&gt;GitHub&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9ayn0cfn0p9udxvjff81.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9ayn0cfn0p9udxvjff81.png" alt="Articles display webpage" width="800" height="482"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  How I Used Bright Data
&lt;/h2&gt;

&lt;p&gt;I leveraged Bright Data’s Scraping Browser to handle JavaScript-heavy and interactive websites that require dynamic content loading. The project scrapes real-time data, including titles, descriptions, URLs, images, and published dates. Bright Data's browser provided a smooth solution to maintain the scraping process without additional overhead.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Challenge Prompt:&lt;/strong&gt; &lt;a href="https://dev.to/devteam/join-us-for-the-bright-data-web-scraping-challenge-3000-in-prizes-3mg2?"&gt;Bright Data Web Scraping Challenge&lt;/a&gt;&lt;/p&gt;

&lt;h2&gt;
  
  
  Installation
&lt;/h2&gt;

&lt;ol&gt;
&lt;li&gt;Clone the repository
&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;git clone https://github.com/chethanyadav456/Scraping_Master.git
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ol&gt;
&lt;li&gt;Install dependencies
&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;npm &lt;span class="nb"&gt;install&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ol&gt;
&lt;li&gt;Run the project
&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;node master.js
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;ol&gt;
&lt;li&gt;Create a .env file and add:
&lt;/li&gt;
&lt;/ol&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;MONGO_URI=
BROWSER_WS=
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  License
&lt;/h2&gt;

&lt;p&gt;This project is licensed under the MIT License - see the LICENSE file for details&lt;/p&gt;

</description>
      <category>brightdatachallenge</category>
      <category>devchallenge</category>
      <category>webdev</category>
      <category>api</category>
    </item>
  </channel>
</rss>
