欢迎光临 - 我的站长站,本站所有资源仅供学习与参考,禁止用于商业用途或从事违法行为!

python教程

Python平台热搜热文爬取代码

python教程 我的站长站 2021-08-14 共61人阅读

前言

分享一段Python爬取各大平台热搜热文信息,支持微博热搜、抖音热搜、百度实时热点、知乎热榜、虎嗅热文、哔哩哔哩全站排行、豆瓣新片,免去一个一个网站的看了,是站长编辑的福音。

提示:此代码为Python代码,需要有一点基础才能运行,如果是才能,我的站长站推荐您直接使用下方网站。

相关信息
编辑必备 全网热门新闻源统计网站-今日热榜
编辑必备 全网热门新闻源统计网站-今日热榜

编辑朋友们,还不知道今天该更新什么内容吗?还在找热门新闻源而烦恼吗?今天给大家推荐一款在线统计全网热门新闻源的网站-今日热榜。今日热榜聚合各大网站排行前十新...

python爬虫代码

#!/usr/bin/python
# -*- coding:utf-8 -*-
# [url=home.php?mod=space&uid=267492]@file[/url]   : today_hot_demo.py
# [url=home.php?mod=space&uid=238618]@Time[/url]   : 2021/8/5 20:57
 
import sys
import threading
from PyQt5 import QtCore, QtGui, QtWidgets
from requests import get
from lxml import etree
 
 
lock = threading.Lock()
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
 
 
class Ui_WinMain(object):
    def __init__(self):
        self.all_url = {
            '微博热搜...': 'https://tophub.today/n/KqndgxeLl9',
            '抖音热搜...': 'https://tophub.today/n/K7GdaMgdQy',
            '百度实时热点...': 'https://tophub.today/n/Jb0vmloB1G',
            '知乎热榜...': 'https://tophub.today/n/mproPpoq6O',
            '虎嗅热文...': 'https://tophub.today/n/5VaobgvAj1',
            '哔哩哔哩全站排行...': 'https://tophub.today/n/74KvxwokxM',
            '豆瓣新片...': 'https://tophub.today/n/mDOvnyBoEB'
        }
 
    def spider(self, hot_url):
        self.textBrowser.clear()
        response = get(url=hot_url, headers=HEADERS)
        html = etree.HTML(response.content.decode('utf-8'))
 
        for oes_title in range(1, 11):
            try:
                title = html.xpath(
                    f'//*[@id="page"]/div[2]/div[2]/div[1]/div[2]/div/div[1]/table/tbody/tr[{oes_title}]/td[2]/a//text()')[
                    0]
                hot_index = html.xpath(
                    f'//*[@id="page"]/div[2]/div[2]/div[1]/div[2]/div/div[1]/table/tbody/tr[{oes_title}]/td[3]//text()')[
                    0]
                short_title_url = html.xpath(
                    f'//*[@id="page"]/div[2]/div[2]/div[1]/div[2]/div/div[1]/table/tbody/tr[{oes_title}]/td[2]/a//@href')[
                    0]
 
                title_url = 'https://tophub.today' + short_title_url
 
                print(' %8s | %26s' % (hot_index, title))
                mt_txt = ' %8s | %6sn' % (hot_index, title)
                self.textBrowser.append(mt_txt)
 
            except:
                pass
 
    def setupUi(self, WinMain):
        WinMain.setObjectName("WinMain")
        WinMain.setWindowModality(QtCore.Qt.WindowModal)
        WinMain.setEnabled(True)
        WinMain.resize(660, 700)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(WinMain.sizePolicy().hasHeightForWidth())
        WinMain.setSizePolicy(sizePolicy)
        WinMain.setMaximumSize(QtCore.QSize(660, 700))
        palette = QtGui.QPalette()
        brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Base, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Window, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Base, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Window, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Base, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Window, brush)
        WinMain.setPalette(palette)
        font = QtGui.QFont()
        font.setPointSize(16)
        WinMain.setFont(font)
        WinMain.setLayoutDirection(QtCore.Qt.LeftToRight)
        self.textBrowser = QtWidgets.QTextBrowser(WinMain)
        self.textBrowser.setEnabled(True)
        self.textBrowser.setGeometry(QtCore.QRect(10, 60, 641, 631))
        self.textBrowser.setMaximumSize(QtCore.QSize(660, 666))
        font = QtGui.QFont()
        font.setFamily("华文中宋")
        font.setPointSize(18)
        font.setBold(False)
        font.setWeight(50)
        self.textBrowser.setFont(font)
        self.textBrowser.setLayoutDirection(QtCore.Qt.LeftToRight)
        self.textBrowser.setAutoFillBackground(True)
        self.textBrowser.setFrameShape(QtWidgets.QFrame.NoFrame)
        self.textBrowser.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.textBrowser.setTabStopWidth(200)
        self.textBrowser.setObjectName("textBrowser")
        self.comboBox = QtWidgets.QComboBox(WinMain)
        self.comboBox.setGeometry(QtCore.QRect(120, 10, 240, 40))
        self.comboBox.setMaximumSize(QtCore.QSize(320, 40))
        font = QtGui.QFont()
        font.setFamily("华文中宋")
        font.setPointSize(18)
        self.comboBox.setFont(font)
        self.comboBox.setCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
        self.comboBox.setMouseTracking(False)
        self.comboBox.setFocusPolicy(QtCore.Qt.StrongFocus)
        self.comboBox.setContextMenuPolicy(QtCore.Qt.ActionsContextMenu)
        self.comboBox.setLayoutDirection(QtCore.Qt.LeftToRight)
        self.comboBox.setAutoFillBackground(False)
        self.comboBox.setEditable(False)
        self.comboBox.setMaxCount(20)
        self.comboBox.setInsertPolicy(QtWidgets.QComboBox.InsertAlphabetically)
        self.comboBox.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContentsOnFirstShow)
        self.comboBox.setDuplicatesEnabled(False)
        self.comboBox.setFrame(False)
        self.comboBox.setModelColumn(0)
        self.comboBox.setObjectName("comboBox")
        self.label = QtWidgets.QLabel(WinMain)
        self.label.setGeometry(QtCore.QRect(10, 10, 101, 41))
        font = QtGui.QFont()
        font.setFamily("楷体")
        font.setPointSize(20)
        font.setBold(True)
        font.setWeight(75)
        font.setStyleStrategy(QtGui.QFont.PreferAntialias)
        self.label.setFont(font)
        self.label.setObjectName("label")
        self.pushButton = QtWidgets.QPushButton(WinMain)
        self.pushButton.setGeometry(QtCore.QRect(490, 10, 110, 40))
        self.pushButton.setMinimumSize(QtCore.QSize(110, 0))
        self.pushButton.setMaximumSize(QtCore.QSize(110, 40))
        palette = QtGui.QPalette()
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Button, brush)
        brush = QtGui.QBrush(QtGui.QColor(85, 170, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.ButtonText, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Button, brush)
        brush = QtGui.QBrush(QtGui.QColor(85, 170, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.ButtonText, brush)
        brush = QtGui.QBrush(QtGui.QColor(255, 170, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Button, brush)
        brush = QtGui.QBrush(QtGui.QColor(120, 120, 120))
        brush.setStyle(QtCore.Qt.SolidPattern)
        palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.ButtonText, brush)
        self.pushButton.setPalette(palette)
        font = QtGui.QFont()
        font.setFamily("Segoe Script")
        font.setPointSize(24)
        font.setItalic(False)
        font.setUnderline(False)
        self.pushButton.setFont(font)
        self.pushButton.setCursor(QtGui.QCursor(QtCore.Qt.OpenHandCursor))
        self.pushButton.setFocusPolicy(QtCore.Qt.WheelFocus)
        self.pushButton.setAutoRepeat(False)
        self.pushButton.setAutoExclusive(False)
        self.pushButton.setAutoDefault(False)
        self.pushButton.setFlat(True)
        self.pushButton.setObjectName("pushButton")
 
        self.retranslateUi(WinMain)
        self.comboBox.setCurrentIndex(-1)
        QtCore.QMetaObject.connectSlotsByName(WinMain)
 
    def retranslateUi(self, WinMain):
        _translate = QtCore.QCoreApplication.translate
        WinMain.setWindowTitle(_translate("WinMain", "Today Hot"))
        self.textBrowser.setHtml(_translate("WinMain",
                                            "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">n"
                                            "<html><head><meta name="qrichtext" content="1" /><style type="text/css">n"
                                            "p, li { white-space: pre-wrap; }n"
                                            "</style></head><body style=" font-family:'华文新魏'; font-size:16pt; font-weight:400; font-style:normal;">n"
                                            "<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; font-family:'SimSun';"><br /></p></body></html>"))
        self.label.setText(_translate("WinMain", "获取源:"))
        self.pushButton.setText(_translate("WinMain", "→ GO"))
 
    def main_posh(self):
        self.pushButton.clicked.connect(self.post_url)
 
    def add_url_box(self):
        for hot_url_ in self.all_url:
            self.comboBox.addItem(hot_url_)
 
    def post_url(self):
        hot_title = self.comboBox.currentText()
        title_url = self.all_url[hot_title]
 
        self.spider(hot_url=title_url)
 
 
def open_html():
    app = QtWidgets.QApplication(sys.argv)
    WinMain = QtWidgets.QWidget()
    ui = Ui_WinMain()
    ui.setupUi(WinMain)
 
    ui.add_url_box()
    ui.main_posh()
    ui.post_url()
 
    WinMain.show()
    sys.exit(app.exec_())
 
 
if __name__ == "__main__":
    open_html()


标签 Python爬取
相关推荐
  • Python爬取
  • Python爬取豆瓣电影top250排行榜

    Python爬取豆瓣电影top250排行榜示例代码,用的parsel和re两个模块,代码如下:import requestsimport csvimport reimport parselwith open("豆瓣top250.csv",mode="w",encoding="utf_8_sig",newline=&#39;&#39;) as f: csv_writer = csv.writer(f) ...

    python教程 50 2年前
  • 百度图库python批量爬取下载代码

    # @风清扬(fqy2022)import requestsimport timeimport os# 创建保存文件夹if os.path.isdir(r&#39;./保存&#39;): print(&#39;已存在文件夹!&#39;)else: os.mkdir(&#39;./保存&#39;) print(&#39;已为您创建文件夹!&#39;) class Image(object)...

    python教程 91 2年前
  • Python平台热搜热文爬取代码

    前言分享一段Python爬取各大平台热搜热文信息,支持微博热搜、抖音热搜、百度实时热点、知乎热榜、虎嗅热文、哔哩哔哩全站排行、豆瓣新片,免去一个一个网站的看了,是站长编辑的福音。提示:此代码为Python代码,需要有一点基础才能运行,如果是才能,我的站长站...

    python教程 61 3年前
  • 获取免费的https代理Python代码

    前言大家用Python爬网页时候,爬快了被封IP,爬慢了,等的着急,这时候就需要https代理来切换IP了。分享一段获取免费的https代理Python代码,可以快速获取网络上免费的https代理。Python代码from multiprocessing.dummy import Lockimport reimport requestsi...

    python教程 101 3年前
  • Python爬取知乎内容脚本

    题主的数据科学导论作业,关于舆情分析负责信息爬取。可能会对大家有点帮助,如果有哪写的不太好的地方,希望可以告诉我如果不想看,直接用的话把js代码命名为 g_encrypt.js 和python代码放在同一级目录就可以了(要搭建nodejs环境,具体可以参考Nodejs安装及环...

    python教程 135 3年前