Python實現(xiàn)批量采集商品數(shù)據(jù)的示例詳解
本次目的
python批量采集某商品數(shù)據(jù)
知識點
requests 發(fā)送請求
re 解析網(wǎng)頁數(shù)據(jù)
json 類型數(shù)據(jù)提取
csv 表格數(shù)據(jù)保存
開發(fā)環(huán)境
python 3.8
pycharm
requests
代碼
導(dǎo)入模塊
import json import random import time import csv import requests import re import pymysql
核心代碼
# 連接數(shù)據(jù)庫
def save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick):
count = pymysql.connect(
host='xxx.xxx.xxx.xxx', # 數(shù)據(jù)庫地址
port=3306, # 數(shù)據(jù)庫端口
user='xxxx', # 數(shù)據(jù)庫賬號
password='xxxx', # 數(shù)據(jù)庫密碼
db='xxxx' # 數(shù)據(jù)庫表名
)
# 創(chuàng)建數(shù)據(jù)庫對象
db = count.cursor()
# 寫入sql
sql = f"insert into goods(title, pic_url, detail_url, view_price, item_loc, view_sales, nick) values ('{title}', '{pic_url}', '{detail_url}', {view_price}, '{item_loc}', '{view_sales}', '{nick}')"
# 執(zhí)行sql
db.execute(sql)
# 保存修改內(nèi)容
count.commit()
db.close()
headers = {
'cookie': 'miid=4137864361077413341; tracknick=%5Cu5218%5Cu6587%5Cu9F9978083283; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; cna=MNI4GicXYTQCAa8APqlAWWiS; enc=%2FWC5TlhZCGfEq7Zm4Y7wyNToESfZVxhucOmHkanuKyUkH1YNHBFXacrDRNdCFeeY9y5ztSufV535NI0AkjeX4g%3D%3D; t=ad15767ffa6febb4d2a8709edebf63d3; lgc=%5Cu5218%5Cu6587%5Cu9F9978083283; sgcookie=E100EcWpAN49d4Uc3MkldEc205AxRTa81RfV4IC8X8yOM08mjVtdhtulkYwYybKSRnCaLHGsk1mJ6lMa1TO3vTFmr7MTW3mHm92jAsN%2BOA528auARfjf2rnOV%2Bx25dm%2BYC6l; uc3=nk2=ogczBg70hCZ6AbZiWjM%3D&vt3=F8dCvCogB1%2F5Sh1kqHY%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D&id2=UNGWOjVj4Vjzwg%3D%3D; uc4=nk4=0%40oAWoex2a2MA2%2F2I%2FjFnivZpTtTp%2F2YKSTg%3D%3D&id4=0%40UgbuMZOge7ar3lxd0xayM%2BsqyxOW; _cc_=W5iHLLyFfA%3D%3D; _m_h5_tk=ac589fc01c86be5353b640607e791528_1647451667088; _m_h5_tk_enc=7d452e4e140345814d5748c3e31fc355; xlly_s=1; x5sec=7b227365617263686170703b32223a223264393234316334363365353038663531353163633366363036346635356431434c61583635454745506163324f2f6b2b2b4b6166686f4d4d7a45774e7a4d794d6a59324e4473784d4b6546677037382f2f2f2f2f77453d227d; JSESSIONID=1F7E942AC30122D1C7DBA22C429521B9; tfstk=cKKGBRTY1F71aDbHPcs6LYjFVa0dZV2F6iSeY3hEAYkCuZxFizaUz1sbK1hS_r1..; l=eBEVp-O4gnqzSzLbBOfwnurza77OIIRAguPzaNbMiOCPO75p5zbNW60wl4L9CnGVhsTMR3lRBzU9BeYBqo44n5U62j-la1Hmn; isg=BDw8SnVxcvXZcEU4ugf-vTadDdruNeBfG0WXdBa9WicK4dxrPkd97hHTxQmZqRi3',
'referer': 'https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
}
with open('淘寶.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['title', 'pic_url', 'detail_url', 'view_price', 'item_loc', 'view_sales', 'nick'])
for page in range(1, 101):
url = f'https://s.taobao.com/search?q=%E4%B8%9D%E8%A2%9C&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20220323&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={44*page}'
response = requests.get(url=url, headers=headers)
json_str = re.findall('g_page_config = (.*);', response.text)[0]
json_data = json.loads(json_str)
auctions = json_data['mods']['itemlist']['data']['auctions']
for auction in auctions:
try:
title = auction['raw_title']
pic_url = auction['pic_url']
detail_url = auction['detail_url']
view_price = auction['view_price']
item_loc = auction['item_loc']
view_sales = auction['view_sales']
nick = auction['nick']
print(title, pic_url, detail_url, view_price, item_loc, view_sales, nick)
save_sql(title, pic_url, detail_url, view_price, item_loc, view_sales, nick)
with open('淘寶.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([title, pic_url, detail_url, view_price, item_loc, view_sales, nick])
except:
pass
time.sleep(random.randint(3, 5))
到此這篇關(guān)于Python實現(xiàn)批量采集商品數(shù)據(jù)的示例詳解的文章就介紹到這了,更多相關(guān)Python采集商品數(shù)據(jù)內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
基于python實現(xiàn)鼠標(biāo)實時坐標(biāo)監(jiān)測
這篇文章主要給大家介紹了如何基于python實現(xiàn)鼠標(biāo)實時坐標(biāo)監(jiān)測,文章通過代碼示例介紹的非常詳細,對大家的學(xué)習(xí)或工作有一定的幫助,需要的朋友可以參考下2023-11-11
Django中celery執(zhí)行任務(wù)結(jié)果的保存方法
今天小編就為大家分享一篇Django中celery執(zhí)行任務(wù)結(jié)果的保存方法,具有很好的參考價值,希望對大家有所幫助。一起跟隨小編過來看看吧2019-07-07

