据
相信很多买车的朋友,⾸先会在⽹上查资料,对⽐车型价格等,⾸选就是“汽车之家”,于是,今天我就给⼤家扒⼀扒汽车之家的数据:⼀、汽车价格:
⾸先获取的数据是各款汽车名称、价格范围以及最低指导价:
def get_oa_price(self):
try:
oa_price_data_list=[]
for page in range(1,27):
oa_price_api = f"price.16888/gz/search-0-0-0-0-0-0-0-0-0-1-0-0-0-0-{page}.html"
response = _html(oa_price_api)
if not response:
print('城市页请求失败')
return 0
#燃油车数据块
oa_data_= re.findall(r'<div class="style-box ">\s+<ul class="clearfix">([\s\S]*?)</ul>',)[0]
#燃油车id和名字列表
car_id_name_list = re.findall(r'data-sid="(\d+)" data-name="(.*?)">',oa_data_)
# 价格范围列表
减速带price_range_list = re.findall(fr'<p>(.*?)\s+<span class="', )
if len(car_id_name_list)==len(price_range_list):
for index,car_list in enumerate(car_id_name_list):
car_id,car_name = car_list
#价格范围
price_range = price_range_list[index]
#最低价
price_min = int(eval(price_range.split('-')[0])*10000)
oa_price_data_list.append((int(car_id),car_name,price_range,price_min))
# print(price_min)
if not oa_price_data_list or not len(oa_price_data_list):
return 0
print(oa_price_data_list)
print("燃油车价格已经爬取完成")
return 1
except:
llect_error()
结果输出如下:
⼆、汽车销量:
⼀般买东西,尤其⽹上买东西,⼀看价格,⼆看销量。销量好不好有时候也会决定买不买:
def get_ea_sale(self):
try:
ea_sale_data_list = []
for year in range(2018,2021):
for month in range(1, 13):
if month>9:
date_ = str(year) + str(month)
else:
date_ = str(year) + "0" + str(month)
for i in range(1,3):
ea_sale_api = f"xl.16888/ev-{date_}-{date_}-{i}.html"
print(ea_sale_api)
response = _html(ea_sale_api)
if not response:
print('城市页请求失败')
return 0
re_no = re.findall(r'<p>暂时没有 <em>电动车</em> <em>\d+.\d+</em> ;的销量数据</p>',) if re_no and len(re_no):
print("没第⼆页")
break
# 销量数据块
ea_sale_data_ = re.findall(r'<th width="\w+">车型相关</th>([\s\S]*?)<div class="xl-data-pageing lbBox">',
<)[0]
# 燃油车id和名字列表
car_id_name_list = re.findall(r'<td class="xl-td-\w+"><a href="/s/(\d+)/" target="_blank">(.*?)</a></td>', ea_sale_data_) # 销量列表
sale_list = re.findall(r'<td class="xl-td-t3">(\d+)</td>', ea_sale_data_)二手307
if len(car_id_name_list) == len(sale_list):
for index, car_list in enumerate(car_id_name_list):
car_id, car_name = car_list
# 价格范围
sale_num = int(sale_list[index])
ea_sale_data_list.append((int(car_id), car_name, sale_num,date_))
#没有列表数据
if not ea_sale_data_list or not len(ea_sale_data_list):
return 0
print(ea_sale_data_list)
print("电动车销量已经爬取完成")
return 1
except:
llect_error()
结果输出如下:
三、汽车评论:
俗话说:买东西看三宝,⼀看价格,⼆看销量,三看评论。
def car_comment(self):
try:
ea_com_api = f"k.autohome/ajax/getSceneSelectCar?minprice=2&maxprice=110&_appid=koubei&fueltype=4" response = _html(ea_com_api)
if not response:
print('车型列表请求失败')
return 0
ea_com_json=json.)
# print(ea_com_json)
result_list = ea_com_json['result']
for result in result_list:
ea_com_data_list = []
car_id = int(result['SeriesId'])
car_name = result['SeriesName']
print(car_name)
com_api = f"k.autohome/{car_id}/index_1.html"
com_resp = _html(com_api)
if not com_resp:
print('⼝碑列表请求失败')
continue
#查看⼝碑的条数
com_num_list = re.findall(r'<span class="fn-right \w+">共有(\d+)条⼝碑</span>',)
if not com_num_list or not len(com_num_list):
print("没有⼝碑")
api_ip = 'ip.dobel/switch-ip'
api_ip_resp = _html(api_ip)
time.sleep(1)
com_api = f"k.autohome/{car_id}/index_1.html"
com_resp = _html(com_api)
if not com_resp:
print('⼝碑列表请求失败=========')
continue
# 查看⼝碑的条数
com_num_list = re.findall(r'<span class="fn-right \w+">共有(\d+)条⼝碑</span>', ) if not com_num_list or not len(com_num_list):
print("没有⼝碑=========")
continue
com_num = int(com_num_list[0])
if com_num>15:
#翻页
page_num_list = re.findall(r"<span class='page-item-info'>共(\d+)页</span>",)
if not page_num_list or not len(page_num_list):
print("没有⼝碑")
page_num = 1
else:
page_num = int(page_num_list[0])华泰多少钱
else:
page_num = 1
for page in range(1,page_num+1):
com_api2 = f"k.autohome/{car_id}/index_{page}.html"
print(com_api2)
com_resp2 = _html(com_api2)
if not com_resp2:
print('⼝碑列表2请求失败')
api_ip = 'ip.dobel/switch-ip'
api_ip_resp = _html(api_ip)
time.sleep(1)
com_resp2 = _html(com_api2)
if not com_resp2:
print('⼝碑列表3请求失败')
continue
#评论id和评论链接
com_id_url_list = re.findall(r'发表了⼝碑\s+<a href="(.*?)"',)
if not com_id_url_list or not len(com_id_url_list):
print("没有⼝碑id")
api_ip = 'ip.dobel/switch-ip'15万左右的suv汽车
api_ip_resp = _html(api_ip)
time.sleep(1)
com_resp3 = _html(com_api2)
if not com_resp3:
print('⼝碑列表3请求失败========')
continue
continue
# 评论id和评论链接
com_id_url_list = re.findall(r'发表了⼝碑\s+<a href="(.*?)"', ) if not com_id_url_list or not len(com_id_url_list):
print("没有⼝碑id======")
continue
for com_id_url in com_id_url_list:
com_url = com_id_url
#以时间戳作为评论id
科鲁兹贷款首付多少com_id = str(uuid.uuid4())
ea_com_data_list.append((car_id,car_name,com_id,com_url))
# 没有列表数据
if not ea_com_data_list or not len(ea_com_data_list):
return 0
print(f"汽车之家{car_name}评论id已经爬取完成")
return 1奥迪a5论坛
except:
llect_error()
以上就是我的分享,如果有什么不⾜之处请指出,多交流,谢谢!
想获取更多数据或定制爬⾍的请私信我。
发布评论