Iron WebScraper provides a powerful framework to extract data and files from websites using C# code.
- Install IronWebScraper to your Project using Nuget
- Create a Class Extending
WebScraper
- Create an
Init
method that uses theRequest
method to parse at least one URL. - Create a
Parse
method to process the requests, and indeedRequest
more pages. Use response.Css to work with HTML elements using jQuery style CSS selectors - In your application please create and instance of your web scraping class and call the
Start();
method - Read our C# webscraping tutorials to learn how to create advanced web crawlers using IronWebScraper
C#:
using IronWebScraper;
namespace WebScrapingProject
{
class MainClass
{
public static void Main(string[] args)
{
var scraper = new BlogScraper();
scraper.Start();
}
}
class BlogScraper : WebScraper
{
public override void Init()
{
this.LoggingLevel = WebScraper.LogLevel.All;
this.Request("https://blog.scrapinghub.com", Parse);
}
public override void Parse(Response response)
{
foreach (var title_link in response.Css("h2.entry-title a"))
{
string strTitle = title_link.TextContentClean;
Scrape(new ScrapedData() { { "Title", strTitle } });
}
if (response.CssExists("div.prev-post > a[href]"))
{
var next_page = response.Css("div.prev-post > a[href]")[0].Attributes["href"];
this.Request(next_page, Parse);
}
}
}
}
VB:
Imports IronWebScraper
Namespace WebScrapingProject
Friend Class MainClass
Public Shared Sub Main(ByVal args() As String)
Dim scraper = New BlogScraper()
scraper.Start()
End Sub
End Class
Friend Class BlogScraper
Inherits WebScraper
Public Overrides Sub Init()
Me.LoggingLevel = WebScraper.LogLevel.All
Me.Request("https://blog.scrapinghub.com", AddressOf Parse)
End Sub
Public Overrides Sub Parse(ByVal response As Response)
For Each title_link In response.Css("h2.entry-title a")
Dim strTitle As String = title_link.TextContentClean
Scrape(New ScrapedData() From {
{ "Title", strTitle }
})
Next title_link
If response.CssExists("div.prev-post > a[href]") Then
Dim next_page = response.Css("div.prev-post > a[href]")(0).Attributes("href")
Me.Request(next_page, AddressOf Parse)
End If
End Sub
End Class
End Namespace
Top comments (0)