from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup
from pathlib import Path
import csv
import os
base_url = 'https://www.foodnetwork.com/restaurants/shows/diners-drive-ins-and-dives/a-z/p/'
# Function
def scraper(some_url, csv_file):
# Offload the content into a variable
print(some_url)
page = Request(some_url, headers={'User-Agent': 'Mozilla/5.0'})
my_page = urlopen(page)
dump = my_page.read()
# Close that page
my_page.close()
# html parser dump
my_soup = soup(dump, "html.parser")
# Adds all containers to a list
text_container = my_soup.findAll("div", {"class":"m-MediaBlock__m-TextWrap"})
# Access Web Address Text: weblink = text_container[0].a["href"]
# Name Container:
name_container = my_soup.findAll("span",
{"class":"m-MediaBlock__a-HeadlineText"})
# Access Name Text: name = name_container[0].text
# Address Container:
address_container = my_soup.findAll("div",{"class":"m-Info__a-Address"})
# Access Address Text: address_text = address_container[0].text
# Description Container:
description_container = my_soup.findAll("div",
{"class":"m-MediaBlock__a-Description"})
# Access Description text: description = description_container[0].text
#print(text_container)
#print(address_container)
i = 0
csv_writer = csv.writer(f, delimiter=',')
while i < len(address_container):
line=[]
description = description_container[i].text.split('.')[0] + '.'
address = address_container[i].text + ','
# Name
line.append(str(name_container[i].text))
# Address
line.append(address.strip())
# Website
line.append(text_container[i].a["href"])
# Description
line.append(description.strip())
#csv_file.write(line)
csv_writer.writerow(line)
i += 1
def main():
# Open file and write header
f = open(str(os.getcwd()) + '\\restaurants.csv', "w", newline='',encoding="UTF-8")
f.write("name,address,website,description\n")
# Loop through all pages
p = 1
while p < 83:
my_url = base_url + str(p)
p += 1
scraper(my_url, f)
f.close()
if __name__ == "__main__":
main()