最新消息:

Python采集工具,品拉索产品收录查询源码

Python与SEO 追逐 183浏览 0评论

Python采集工具,品拉索产品收录查询源码

python

python

#品拉索产品收录查询

# -*- coding: UTF-8 -*-
import requests
import re,time
from fake_useragent import UserAgent


#随机协议头
def ua():
    ua=UserAgent()
    headers={"User-Agent":ua.random}
    return headers

#获取链接
def get_urls():
    urls=[]
    url="http://www.pinlasuo.com/design.html"
    headers=ua()
    response=requests.get(url,headers=headers).text
    cpze=r'<div class="media__body">.+? <div class="pinsuo-border">.+?<a href="(.+?)"><p class="pinsuo-icon1"></p></a>'
    hrefs=re.findall(cpze,response,re.S)
    for href in hrefs:
        href=f'http://www.pinlasuo.com{href}'
        urls.append(href)
    print(len(urls))
    print(urls)
    return urls

#收录查询
def bdcx(url):
    opencsv=open('plsslcx.csv','a+')
    bdurl="https://www.baidu.com/s?wd="
    cxurl=f'{bdurl}{url}'
    headers = ua()
    response=requests.get(cxurl,headers=headers).text
    if "没有找" in response:
        sljg=f'{url}--×未收录'
        opencsv1=open('wslplsslcx.csv','a+')
        opencsv1.write('%s\n' % url)
    else:
        sljg= f'{url}--√已收录'
        opencsv1 = open('yslplsslcx.csv', 'a+')
        opencsv1.write('%s\n' % url)
    print(sljg)
    time.sleep(1)
    #保存为excel文档
    opencsv.write('%s\n'%sljg)
    #保存为txt文档
    with open('plssscx.txt','a+',encoding='utf-8')as f:
        f.write(f'{sljg}\n')


#收录数据分析
def fx():
    data=[]
    n=m=0
    with open('plssscx.txt',encoding='utf-8')as f:
        for line in f:
            line=line.replace('\n','')
            if "已收录" in line:
                n=n+1
            else:
                m=m+1
            data.append(line)
    print(data)
    i=len(data)
    print(i,n,m)
    sll=n/i*100
    print(f'收录率:{int(sll)}%')




if __name__ == '__main__':
    urls = get_urls()
    for url in urls:
        bdcx(url)

    fx()

转载请注明:二爷记 » Python采集工具,品拉索产品收录查询源码

发表我的评论
取消评论
表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址