更新时间:2022-10-15 16:29:12
Here is one way. You mileage may vary on other pages.
This line
details = [re.sub(r'\s{2,}|[,]', '',i) for i in restuarant.select_one('h3 + p').text.strip().split('\n') if i!=''
basically handles the generation of the output columns (bar name) by splitting the p
tag on '\n' and doing a little string cleaning.
import requests, re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
driver = webdriver.Chrome(executable_path=r"C:\Users\User\Documents\chromedriver.exe")
driver.get('https://www.restaurant.com/listing?&&st=KS&p=KS&p=PA&page=1&&searchradius=50&loc=10021')
WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".restaurants")))
soup = BeautifulSoup(driver.page_source, 'html.parser')
restuarants = soup.select('.restaurants')
results = []
for restuarant in restuarants:
details = [re.sub(r'\s{2,}|[,]', '',i) for i in restuarant.select_one('h3 + p').text.strip().split('\n') if i!='']
details.insert(0, restuarant.select_one('h3 a').text)
results.append(details)
df = pd.DataFrame(results, columns= ['Name','Address', 'City', 'State', 'Zip', 'Phone'])
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8-sig',index = False )