What will be scraped
Full Code
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selectolax.lexbor import LexborHTMLParser
import time, json
def scroll_page(url):
service = Service(ChromeDriverManager().install())
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(service=service, options=options)
stealth(
driver,
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
languages=['en-US', 'en'],
vendor='Google Inc.',
platform='Win32',
webgl_vendor='Intel Inc.',
renderer='Intel Iris OpenGL Engine',
fix_hairline=True,
)
driver.get(url)
time.sleep(2)
old_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
body = driver.find_element(By.TAG_NAME, 'body')
while True:
driver.execute_script("window.scrollTo(0, document.querySelector('.t8aeve').scrollHeight)")
time.sleep(1)
body.send_keys(Keys.PAGE_UP)
time.sleep(2)
new_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
if new_height == old_height:
break
old_height = new_height
parser = LexborHTMLParser(driver.page_source)
driver.quit()
return parser
def scrape_google_hotels(parser):
data = []
for result in parser.root.css('.uaTTDe'):
result_dict = {}
if result.css_first('.hVE5 .ogfYpf'):
result_dict['ad'] = result.css_first('.hVE5 .ogfYpf').text().replace(' ', ' ')
result_dict['title'] = result.css_first('.QT7m7 h2').text()
result_dict['link'] = 'https://www.google.com' + result.css_first('.PVOOXe').attributes.get('href')
price = result.css_first('.OxGZuc .kixHKb span')
result_dict['price'] = price.text() if price else None
result_dict['extracted_price'] = float(price.text().split()[0]) if price else None
rating = result.css_first('.FW82K .KFi5wf')
result_dict['rating'] = float(rating.text()) if rating else None
reviews = result.css_first('.FW82K .jdzyld')
result_dict['reviews'] = int(reviews.text()[2:-1].replace(',', '')) if reviews else None
result_dict['extensions'] = [extension.css_first('.sSHqwe').text() for extension in result.css('.RJM8Kc .HlxIlc div, li')]
result_dict['thumbnails'] = [
thumbnail.attributes.get('src') if thumbnail.attributes.get('src') else thumbnail.attributes.get('data-src')
for thumbnail in result.css('.NBZP0e .q5P4L')
]
data.append(result_dict)
return data
def main():
URL = 'https://www.google.com/travel/hotels?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=0CAAQ5JsGahcKEwioxL-n2oT9AhUAAAAAHQAAAAAQBA&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA'
parser = scroll_page(URL)
google_hotels = scrape_google_hotels(parser)
print(json.dumps(google_hotels, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()
Preparation
Install libraries:
pip install selenium selenium-stealth webdriver webdriver_manager selectolax
Code Explanation
Import libraries:
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selectolax.lexbor import LexborHTMLParser
import time, json
Library | Purpose |
---|---|
webdriver |
to drive a browser natively, as a user would, either locally or on a remote machine using the Selenium server. |
selenium-stealth |
to prevent almost all website detections (CAPTCHA and so on). |
Service |
to manage the starting and stopping of the ChromeDriver. |
By |
to set of supported locator strategies (By.ID , By.TAG_NAME , By.XPATH etc). |
Keys |
to simulate button presses (Keys.PAGE_UP , Keys.PAGE_DOWN , Keys.END etc). |
LexborHTMLParser |
a fast HTML5 parser with CSS selectors using Lexbor engine. |
time |
to work with time in Python. |
json |
to convert extracted data to a JSON object. |
The next part of the code is divided into functions. Each function is described in the corresponding heading below.
Top-level code environment
The URL variable contains a link to the Google Hotels:
URL = 'https://www.google.com/travel/hotels?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=0CAAQ5JsGahcKEwioxL-n2oT9AhUAAAAAHQAAAAAQBA&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA'
Next, the URL is passed to the scroll_page(URL)
function to start selenium in stealth mode and scroll page. The parser returned by this function is passed to the scrape_google_hotels(parser)
function to extract all the data. The explanation of these functions will be in the corresponding headings below.
parser = scroll_page(URL)
google_hotels = scrape_google_hotels(parser)
After the all data is retrieved, it is output in JSON format:
print(json.dumps(google_hotels, indent=2, ensure_ascii=False))
This code uses boilerplate __name__ == "__main__"
construct that protects users from accidentally invoking the script when they didn't intend to. This indicates that the code is a runnable script:
def main():
URL = 'https://www.google.com/travel/hotels?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=0CAAQ5JsGahcKEwioxL-n2oT9AhUAAAAAHQAAAAAQBA&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA'
parser = scroll_page(URL)
google_hotels = scrape_google_hotels(parser)
print(json.dumps(google_hotels, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()
This check will only be performed if the user has run this file. If the user imports this file into another, then the check will not work.
You can watch the video Python Tutorial: if name == 'main' for more details.
Scroll page
The function takes the URL and returns a parser.
First, let's understand how pagination works on the Google Hotels page. Data does not load immediately. If the user needs more data, they will simply scroll the page and site download a small package of data.
In this case, selenium
library is used, which allows you to simulate user actions in the browser. For selenium
to work, you need to use ChromeDriver
, which can be downloaded manually or using code. In our case, the second method is used. To control the start and stop of ChromeDriver
, you need to use Service
which will install browser binaries under the hood:
service = Service(ChromeDriverManager().install())
You should also add options
to work correctly:
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
Now we can start webdriver
:
driver = webdriver.Chrome(service=service, options=options)
After starting the webdriver
, you need to pass it and other attributes to the stealth()
function. This will make selenium more stealthy to be able to bypass CAPTCHA (Cloudflare one also):
stealth(
driver,
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
languages=['en-US', 'en'],
vendor='Google Inc.',
platform='Win32',
webgl_vendor='Intel Inc.',
renderer='Intel Iris OpenGL Engine',
fix_hairline=True,
)
Pass the URL to the get()
method and wait a few seconds for the site to load:
driver.get(url)
time.sleep(2)
In order to load all the data, you need to implement page scrolling. By scrolling down the page, you can notice that new data is not being loaded. In order for the new data to load, you need to scroll up the page a little and wait. Yes, I'm also surprised that it works that way 🙂
The GIF below shows what it looks like:
The page scrolling algorithm looks like this:
- Find out the initial page height and write the result to the
old_height
variable. - Scroll to the bottom of the page using the script.
- Simulating a "Page Up" button press, scroll up a little on the page to load new data.
- Find out the new page height and write the result to the
new_height
variable. - If the variables
new_height
andold_height
are equal, then we complete the algorithm, otherwise we write the value of the variablenew_height
to the variableold_height
and return to step 2.
Getting the page height and scroll is done by pasting the JavaScript code into the execute_script()
method:
# 1 step
old_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
body = driver.find_element(By.TAG_NAME, 'body')
while True:
# 2 step
driver.execute_script("window.scrollTo(0, document.querySelector('.t8aeve').scrollHeight)")
time.sleep(1)
# 3 step
body.send_keys(Keys.PAGE_UP)
time.sleep(2)
# 4 step
new_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
# 5 step
if new_height == old_height:
break
old_height = new_height
After all the data has been loaded, you need to process HTML using from selectolax
because it has Lexbor
parser which is incredibly fast, like 186% faster compared to bs4
with lxml
backend when parsing data with 3000 iterations 5 times. Please note that selectolax
does not currently support XPath:
parser = LexborHTMLParser(driver.page_source)
After all the operations are done, stop the driver and the parser
is returned:
driver.quit()
return parser
The function looks like this:
def scroll_page(url):
service = Service(ChromeDriverManager().install())
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(service=service, options=options)
stealth(
driver,
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
languages=['en-US', 'en'],
vendor='Google Inc.',
platform='Win32',
webgl_vendor='Intel Inc.',
renderer='Intel Iris OpenGL Engine',
fix_hairline=True,
)
driver.get(url)
time.sleep(2)
old_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
body = driver.find_element(By.TAG_NAME, 'body')
while True:
driver.execute_script("window.scrollTo(0, document.querySelector('.t8aeve').scrollHeight)")
time.sleep(1)
body.send_keys(Keys.PAGE_UP)
time.sleep(2)
new_height = driver.execute_script("""
function getHeight() {
return document.querySelector('.t8aeve').scrollHeight;
}
return getHeight();
""")
if new_height == old_height:
break
old_height = new_height
parser = LexborHTMLParser(driver.page_source)
driver.quit()
return parser
Scrape Google Hotels Listings
The function takes a Lexbor
parser and returns the extracted data.
The data
list is declared to which the extracted data will be added:
data = []
In order to find all the hotels, you need to use the css()
method and pass the .uaTTDe
selector there. For each hotel, the result_dict
dictionary is created, which will be supplemented with all the data about this hotel:
for result in parser.root.css('.uaTTDe'):
result_dict = {}
First of all, you need to check if this is an advertisement. If so, then the key with the corresponding value is added to the result_dict
dictionary:
if result.css_first('.hVE5 .ogfYpf'):
result_dict['ad'] = result.css_first('.hVE5 .ogfYpf').text().replace(' ', ' ')
Data such as title
and link
are easily retrieved:
result_dict['title'] = result.css_first('.QT7m7 h2').text()
result_dict['link'] = 'https://www.google.com' + result.css_first('.PVOOXe').attributes.get('href')
Code | Explanation |
---|---|
css_first() |
to find the desired element. |
text() |
to retrieve the text content. |
attributes |
to get all attributes of an element. |
Sometimes it happens that in some hotels there is no such data as price
and rating
. So a ternary expression is used for such cases:
price = result.css_first('.OxGZuc .kixHKb span')
result_dict['price'] = price.text() if price else None
rating = result.css_first('.FW82K .KFi5wf')
result_dict['rating'] = float(rating.text()) if rating else None
When retrieving reviews
the user gets an element like this:
<span class="jdzyld XLC8M" aria-hidden="true"> (1,602)</span>
In order to extract a numeric value, you need to do the following operations: slice a list by removing space and brackets; remove the comma.
reviews = result.css_first('.FW82K .jdzyld')
result_dict['reviews'] = int(reviews.text()[2:-1].replace(',', '')) if reviews else None
Hotels may have a different number of extensions
. Sometimes they may not exist at all. Therefore, when extracting them, list comprehension is used:
result_dict['extensions'] = [extension.css_first('.sSHqwe').text() for extension in result.css('.RJM8Kc .HlxIlc div, li')]
When extracting thumbnails
, I noticed that the required value is stored in different attributes. Therefore, when extracting a thumbnails
, we first check whether there is an attribute src
. If this attribute exists, then retrieve its value. Otherwise, we extract the value from the data-src
attribute:
result_dict['thumbnails'] = [
thumbnail.attributes.get('src') if thumbnail.attributes.get('src') else thumbnail.attributes.get('data-src')
for thumbnail in result.css('.NBZP0e .q5P4L')
]
Add result_dict
dictionary with all extracted data to the data
list:
data.append(result_dict)
At the end of the function, the data
list is returned:
return data
The function looks like this:
def scrape_google_hotels(parser):
data = []
for result in parser.root.css('.uaTTDe'):
result_dict = {}
if result.css_first('.hVE5 .ogfYpf'):
result_dict['ad'] = result.css_first('.hVE5 .ogfYpf').text().replace(' ', ' ')
result_dict['title'] = result.css_first('.QT7m7 h2').text()
result_dict['link'] = 'https://www.google.com' + result.css_first('.PVOOXe').attributes.get('href')
price = result.css_first('.OxGZuc .kixHKb span')
result_dict['price'] = price.text() if price else None
rating = result.css_first('.FW82K .KFi5wf')
result_dict['rating'] = float(rating.text()) if rating else None
reviews = result.css_first('.FW82K .jdzyld')
result_dict['reviews'] = int(reviews.text()[2:-1].replace(',', '')) if reviews else None
result_dict['extensions'] = [extension.css_first('.sSHqwe').text() for extension in result.css('.RJM8Kc .HlxIlc div, li')]
result_dict['thumbnails'] = [
thumbnail.attributes.get('src') if thumbnail.attributes.get('src') else thumbnail.attributes.get('data-src')
for thumbnail in result.css('.NBZP0e .q5P4L')
]
data.append(result_dict)
return data
Output
[
{
"ad": "From SpringHill Suites by Marriott Tulsa",
"title": "SpringHill Suites by Marriott Tulsa",
"link": "https://www.google.com/travel/hotels/entity/CgoIsq_EvdysoNAlEAEagAFBQUJuQjNtSE1LSm45bGJ0OGFEZ1gtRnhVbnhsVkFtN3Z4cHk3cG5QWmhYX1NCdU9lbHd1V3poM0JFNVpHbGdpeGRXU2xnbUZsSkxUZDJ6eno3MllScFJmOUlodVFPNEFwcDA5TDJMOVFYN0RiVi1CZDZwRkhENmptYURQX0tRbA?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=2ahUKEwiPtOao34T9AhUZ1xEIHRoyAsoQyvcEegQIAxAw&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA&ap=EgRDS2dCMANapwMKBQjwLhAAIgNVQUgqFgoHCOcPEAIYCRIHCOcPEAIYChgBKACwAQBYAWAAcgQIAhgAogESCgkvbS8wMTNrY3YSBVR1bHNhqgEXCgIIEhIDCJsBEgIIaBICCGsSAghuGAGqASwKAggUEgIIPxICCB0SAggbEgMImAESAggwEgIIUhICCE4SAggXEgMIpQIYAaoBBwoDCJwBGACqASMKAggcEgMIlwESAghREgIIWBICCHMSAghHEgIIJBICCE0YAaoBDgoCCCUSAgh5EgIIehgBqgEeCgIIERICCH0SAgg0EgIIQBICCDgSAghXEgIIKxgBqgErCgIILhICCDwSAgg7EgIIVhICCDoSAgg9EgMIgwESAghLEgIIUxICCCgYAaoBBgoCCCwYAKoBCwoDCOECEgIIYxgBqgEKCgIIUBICCBkYAaoBBgoCCAQYAKoBBgoCCAoYAKoBIgoCCDUSAggeEgIIExICCAsSAggxEgIIMhICCF0SAghEGAGSAgIIEJICAggSkgICCBOSAgIIDZICAggRkgICCA-SAgIIDJICAggUkgECIAE",
"price": "107Â USD",
"rating": 4.0,
"reviews": 428,
"extensions": [
"3-star hotel",
"Free breakfast",
"Free Wi-Fi",
"Free parking",
"Indoor pool",
"Air conditioning",
"Fitness center",
"Accessible",
"Business center"
],
"thumbnails": [
"https://lh6.googleusercontent.com/proxy/npSnFI0zPjhU7MB5eo7Bl4y7FufLgyU8A_fsErIYsi1f0Va_aYbqw5jMKZxc0_ShXxvOVxy-1Dz8DnY4JEyL6ixaUe8j7YfdJdoUGQl4DA8XcB1YB_zmxZxiUaR7RcUlNldKHzKfFfy6p35kX2_ftWvHE2zvREQ=w592-h404-n-k-no-v1",
"https://lh4.googleusercontent.com/proxy/zOawslbUt33W_gbP_4SeVbDmtYKUkISF1i2By_w7PJSys9TgI7zlGSrYyA8Cc1K-h4iyqyF-s7sxs8GHzWY0_uBqYG_sFqhpfHl5Hss_br0cNF93VlauapN_gCE92QMhzscWvnXF0HDO13K3hRUYQHeXTO7SHlo=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/proxy/P79WRpA9In1qfrl5tnjRTxY0_UyJhdI2Wp4tIQHSjT47uo0tuN-KKizhTUUMmXnseA3SeootIW1nV7zXbZpnQjuYmvRALMiYtFpoMJDj3VL8xtAY3w4gcsjRVFsbUcxY5WTGYMK0NdF7FC5a4CQvjoFZTpoTIVE=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipO99f-u7Cjgv_GSweNP4GQ6jLVAz__pe9KQV-7w=w592-h404-n-k-no-v1",
"https://lh4.googleusercontent.com/proxy/3UrQRyhdntDx_ak23X4I9L-nSBDckWMXZIjuDWOpxLgWE0fY16sZujVoPhImY5nL-cvCoTlpQf5xMc1JFLgSBZfMARHdAdGxBTXVqCWmw26GcChdBgzC3TYat_zhNtEnexvos_40pdzp1yvPi_Rg3Hy8qfBz2w=w592-h404-n-k-no-v1",
"https://lh6.googleusercontent.com/proxy/iGLJat0Rz_HP8N-EX0AOG2f28yERdh2hxjJaZCWEgH1Rr0yh7ZIHDBhFBtd9KLyx-n4kw7M-ewIs3JOhfHtyXl1D02Kc8l_WCiTHp0JIkG83PuMwgqb5zmSCkxZ6FkE02HFYSw4d16z6w-Jinwxgn1c1qLcRHMg=w592-h404-n-k-no-v1"
]
},
{
"ad": "From Candlewood Suites Tulsa Hills - Jenks, an IHG Hotel",
"title": "Candlewood Suites Tulsa Hills - Jenks, an IHG Hotel",
"link": "https://www.google.com/travel/hotels/entity/CgoIqLmTiPOD-rp1EAEae0FBQm5CM25SczRoM2drV1JJYnJ0U0dXcmduZk5mTEl3Sm1fNkpqZEJWODBmeUFhVlQ3T0ZLWEhXMTVOQTRFYnhaSzc4dE1iWlZPYU1sd2l6b3VZZnhjcjVXblZGU1JLc2RkXzFyUkhUZ3A3c1NRWWRGTUxKMGNHN21XWQ?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=2ahUKEwiPtOao34T9AhUZ1xEIHRoyAsoQyvcEegQIAxBG&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA&ap=EgRDS2dCMANapwMKBQjwLhAAIgNVQUgqFgoHCOcPEAIYCRIHCOcPEAIYChgBKACwAQBYAWAAcgQIAhgAogESCgkvbS8wMTNrY3YSBVR1bHNhqgEXCgIIEhIDCJsBEgIIaBICCGsSAghuGAGqASwKAggUEgIIPxICCB0SAggbEgMImAESAggwEgIIUhICCE4SAggXEgMIpQIYAaoBBwoDCJwBGACqASMKAggcEgMIlwESAghREgIIWBICCHMSAghHEgIIJBICCE0YAaoBDgoCCCUSAgh5EgIIehgBqgEeCgIIERICCH0SAgg0EgIIQBICCDgSAghXEgIIKxgBqgErCgIILhICCDwSAgg7EgIIVhICCDoSAgg9EgMIgwESAghLEgIIUxICCCgYAaoBBgoCCCwYAKoBCwoDCOECEgIIYxgBqgEKCgIIUBICCBkYAaoBBgoCCAQYAKoBBgoCCAoYAKoBIgoCCDUSAggeEgIIExICCAsSAggxEgIIMhICCF0SAghEGAGSAgIIEJICAggSkgICCBOSAgIIDZICAggRkgICCA-SAgIIDJICAggUkgECIAE",
"price": "107Â USD",
"rating": 4.5,
"reviews": 32,
"extensions": [
"2-star hotel",
"Free Wi-Fi",
"Free parking",
"Outdoor pool",
"Air conditioning",
"Pet-friendly",
"Fitness center",
"Bar",
"Restaurant"
],
"thumbnails": [
"https://lh5.googleusercontent.com/p/AF1QipNfsuPQsi_AU4VMrL-1GV6Mg2Xv3UU_GGsX-rJy=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipPaGhgG0rfakDzyPsjRwrKVRTb2FJjE97yZxMdH=w592-h404-n-k-no-v1",
"https://lh4.googleusercontent.com/proxy/Yz8RTylxuiUnuDEQgzUkh9I0EkFdUGBYn7cyHbrG-mD65GGB0YtPAVoC9QWp-9Fa6bRR29Bc3e-INVDBax9h6SVVGwUSF2KHh3nU56NbdFB4LQ5DDho8B5Up-JDZJ4BW1ESoehQQ-cxzX4PSj4JIBDZpu-bjSfU=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipO7qih8wFIYOmU7ovsMhi8Hj03XyvDfWQb93OpQ=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipP-qzQMCQcxIwdLybDudgA7mOdc2yD-Eut4WnKR=w592-h404-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipNdAZiBXI-ZCRAFAx95gL_dZoIiINeiSFj6FQUC=w592-h404-n-k-no-v1"
]
},
{
"title": "Hyatt Regency Tulsa Downtown",
"link": "https://www.google.com/travel/hotels/entity/ChYIgefFrp_A-5tJGgovbS8wcGRkbGtxEAE?q=hotels%20in%20tulsa%20oklahoma&gsas=1&rp=CgpYAGAAcgQIAhgAOAGoAgA&ved=2ahUKEwiPtOao34T9AhUZ1xEIHRoyAsoQyvcEegQIAxBc&utm_campaign=sharing&utm_medium=link&utm_source=htls&ts=CAESCgoCCAMKAggDEAAaIAoCGgASGhIUCgcI5w8QAhgJEgcI5w8QAhgKGAEyAhAAKgkKBToDVVNEGgA&ap=EgRDS2dCMANapwMKBQjwLhAAIgNVQUgqFgoHCOcPEAIYCRIHCOcPEAIYChgBKACwAQBYAWAAcgQIAhgAogESCgkvbS8wMTNrY3YSBVR1bHNhqgEXCgIIEhIDCJsBEgIIaBICCGsSAghuGAGqASwKAggUEgIIPxICCB0SAggbEgMImAESAggwEgIIUhICCE4SAggXEgMIpQIYAaoBBwoDCJwBGACqASMKAggcEgMIlwESAghREgIIWBICCHMSAghHEgIIJBICCE0YAaoBDgoCCCUSAgh5EgIIehgBqgEeCgIIERICCH0SAgg0EgIIQBICCDgSAghXEgIIKxgBqgErCgIILhICCDwSAgg7EgIIVhICCDoSAgg9EgMIgwESAghLEgIIUxICCCgYAaoBBgoCCCwYAKoBCwoDCOECEgIIYxgBqgEKCgIIUBICCBkYAaoBBgoCCAQYAKoBBgoCCAoYAKoBIgoCCDUSAggeEgIIExICCAsSAggxEgIIMhICCF0SAghEGAGSAgIIEJICAggSkgICCBOSAgIIDZICAggRkgICCA-SAgIIDJICAggUkgECIAE",
"price": "$174",
"rating": 4.3,
"reviews": 2755,
"extensions": [
"4-star hotel",
"Breakfast ($)",
"Free Wi-Fi",
"Parking ($)",
"Pools",
"Air conditioning",
"Pet-friendly",
"Fitness center",
"Spa"
],
"thumbnails": [
"https://lh5.googleusercontent.com/p/AF1QipPrPKhl9HabLa_NGsOtbimCikCZWsJcDB29icgS=w296-h202-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipPBoWJnCSqgupKMQ5hZxeLuMjUm6JvcHBecSVT2=w296-h202-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipOsNY1x2kyhAWzE5dxjo4ZrZAg6-pXTscTP675-=w296-h202-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipOOPTxY6Mxtyr5A3jS3mJ2kzClrTVtkirjr7vXS=w296-h202-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipMCzxsTCjtsWJLu8w4BB-Nh9fh1dedKJ9G2pZXk=w296-h202-n-k-no-v1",
"https://lh5.googleusercontent.com/p/AF1QipNrQC1BObMJw6w_QHnnmF3M-WOYsh1JmEwW4mX4=w296-h202-n-k-no-v1"
]
},
... other results
]
Top comments (2)
Heyo Artur - super comprehensive tutorial, thanks for sharing! :D
I've never used
selenium-stealth
before, def going to check that out next time I'm working on a scraping project 😎Thanks for the feedback 🙂
I recently discovered selenium-stealth for myself and was pleased with its capabilities. Now I also use it for my projects.