Database/MongoDB
mongoDB 와 Python 연동 (동적 웹크롤링)
sabeom
2023. 2. 6. 09:55
동적 웹페이지 크롤링
- 관련 모듈 임포트
from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from bs4 import BeautifulSoup import time import re #정규표현식
- 크롤링할 부분 설정
dining_url = "https://m.diningcode.com/list.dc" chrome_options = webdriver.ChromeOptions() # 크롬 WebDriver 객체생성 wd = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options = chrome_options) wd.get(dining_url) time.sleep(1) html = wd.page_source soup = BeautifulSoup(html, 'html.parser') stores = soup.select('div.InfoHeader>h2') scores = soup.select('div.Rate>p.Score') userScores = soup.select('div.Rate>p.UserScore')
- scoreList 가져오기
store_list = list() for index, store in enumerate(stores): store_dict = dict() store_dict["sotre_name"] = re.sub(r'[^\uAC00-\uD7A3\s]', '', store.text) #한글만 추출 store_dict["score"] = scores[index].select_one('span').text userScore = re.sub('<p.*?>*?/>|</p>', '', str(userScores[index])) print('userScore', userScore) store_dict["userScore"] = re.sub('\(\w*\)', '', str(userScore)).strip() scorer = re.findall('\(([^)]+)', userScore) store_dict["scorer"] = scorer[0] store_list.append(store_dict)
- mongoDB 데이터 넣기
import pymongo conn = pymongo.MongoClient() db = conn.bitDB db.stores.insert_many(store_list)