当前位置: 首页 > 后端技术 > Python

爬虫项目实践selenium爬取京东商品信息

时间:2023-03-26 13:49:30 Python

本爬虫爬取了京东的口罩信息,并将数据保存到MongoDB数据库中。其中config为配置信息:MONGO_URL='localhost'MONGO_DB='京东'MONGO_TABLE='mask'文本:fromseleniumimportwebdriverfromselenium.common.exceptionsimportTimeoutExceptionfromselenium.webdriver.common.byimportByfromselenium.webdriver。support.uiimportWebDriverWaitfromselenium.webdriver.supportimportexpected_conditionsasECimporttimefrombs4importBeautifulSoupfromconfigimport*importpymongo#本地创建数据库client=pymongo.MongoClient(MONGO_URL)db=client[MONGO_DB]#创建浏览器Objectanddisplaywaitbrowser=webdriver.Chrome()wait=WebDriverWait(browser,10)defsearch(keys):#forsearchingkeywordstry:browser.get('https://www.jingdong.com/')safe_button_1=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#details-button")))safe_button_1.click()safe_button_2=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#proceed-link")))safe_button_2.click()search_box=等待。直到(EC.presence_of_element_located((By.CSS_SELECTOR,“#key”)))div.form>button")))button.click()total=wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#J_bottomPage>span.p-skip>em:nth-child(1)>b"))).textget_infomation()returntotalexceptTimeoutException:search(keys)defswich_to_page(page_num):#用于翻页try:num_box=wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#J_bottomPage>span.p-skip>input")))next_page_button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#J_bottomPage>span.p-skip>a")))num_box.clear()num_box.send_keys(page_num)next_page_button.click()wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,'#J_bottom取Page>span.p-num>a.curr'),str(page_num)))exceptTimeoutException:swich_to_page(page_num)defget_infomation():#用于解析html获取信息等待。直到(EC.presence_of_element_located((By.CSS_SELECTOR,'#J_goodsList>ul')))soup=BeautifulSoup(browser.page_source,'lxml')products=soup.find_all('li',class_='gl-item')对于产品中的产品:p={'price':product.find('div',class_='p-price').text.strip(),'name':product.find('div',class_='p-name').text.strip(),'comment':product.find('div',class_='p-commit').text.strip(),'shop':product.find('div',class_='p-shop').text.strip(),'label':product.find('div',class_='p-icons').text.strip()}save_to_mongo(p)defsave_to_mongo(result):#将信息保存到数据库try:ifdb[MONGO_TABLE].insert_one(result):print('保存成功',result)except:print('storagefailed',result)defmain(keys):try:total=search(keys)foriinrange(2,int(total)+1):swich_to_page(i)get_infomation()时间.sleep(1)exceptException:print('Anerroroccurred')finally:browser.close()if__name__=='__main__':main('mask')最后可以在数据库中查看爬取到的信息

猜你喜欢