python汇总
(75)

python

typecho阅读量爬取

__author__ = 'jxl'
import requests
# 用于解决爬取的数据格式化
import io
import sys
import re
from datetime import datetime


class ShowProcess():
    """
    显示处理进度的类
    调用该类相关函数即可实现处理进度的显示
    """
    i = 0 # 当前的处理进度
    max_steps = 0 # 总共需要处理的次数
    max_arrow = 50 #进度条的长度
    infoDone = 'done'

    # 初始化函数,需要知道总共的处理次数
    def __init__(self, max_steps, infoDone = 'Done'):
        self.max_steps = max_steps
        self.i = 0
        self.infoDone = infoDone

    # 显示函数,根据当前的处理进度i显示进度
    # 效果为[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]100.00%
    def show_process(self, i=None):
        if i is not None:
            self.i = i
        else:
            self.i += 1
        num_arrow = int(self.i * self.max_arrow / self.max_steps) #计算显示多少个'>'
        num_line = self.max_arrow - num_arrow #计算显示多少个'-'
        percent = self.i * 100.0 / self.max_steps #计算完成进度,格式为xx.xx%
        process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\
                      + '%.2f' % percent + '%' + '\r' #带输出的字符串,'\r'表示不换行回到最左边
        sys.stdout.write(process_bar) #这两句打印字符到终端
        sys.stdout.flush()
        if self.i >= self.max_steps:
            self.close()

    def close(self):
        print('')
        print(self.infoDone)
        self.i = 0



if __name__=='__main__':
    cishu=500
    process_bar = ShowProcess(cishu-1, 'OK')
    url='http://blog.52i.xyz'
    s=0
    shu=0
    for num in range(1, cishu, 1) :
        num1=str(num)
        url1=url+'/index.php/archives/'+num1
       # sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
    # 爬取的网页链接
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
        r= requests.get(url1)
    # 类型
    # print(type(r))
        #print(r.status_code)
    # 中文显示
    # r.encoding='utf-8'
        r.encoding=None
        #print(r.encoding)
        #print(r.text)
        result = r.text
        salary_list = re.findall(r"([\d.]+) 次阅读", result)
        for salary in salary_list:
            # print(url1)
            salary1=int(salary)
            s=s+salary1
           # process_bar.show_process()
            print('文章阅读量:'+salary)
            shu=shu+1
           # print(s)
    s1=str(s)
    Shu=str(shu)
    print('博客: '+url+'的文章数为:'+Shu+'\n  总阅读量为:'+s1)
    neirong='博客: '+url+'的文章数为:'+Shu+'\n  总阅读量为:'+s1
    urll = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
    headers = {'Content-type':'application/json'}
    data = {
    "msgtype": "text",
     "text": {
     "content":neirong, #要发送的文字

        "mentioned_list": ["@all"], #要@的人,可以是特定的人
      }
     }
    r = requests.post(urll,headers=headers,json=data)
    print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"文本发送情况提示",r.text)

爬取typecho文章名

__author__ = 'jxl'

import requests
import re
cishu=500
url='http://blog.52i.xyz'
s=0
shu=0
for num in range(1, cishu, 1) :
    num1=str(num)
    url1=url+'/index.php/archives/'+num1
       # sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
    # 爬取的网页链接
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
    r= requests.get(url1)
    # 类型
    # print(type(r))
    #print(r.status_code)
    # 中文显示
    # r.encoding='utf-8'
    r.encoding=None
        #print(r.encoding)
    # print(r.text) <title>(.*)</title
    result = r.text
    salaryt = re.search('<title>(.*)</title',result)
    if(r.status_code==200):
        print(salaryt)

五子棋

__author__ = 'jxl'
from tkinter import *
import math

#定义棋盘类
class chessBoard() :
  def __init__(self) :
    self.window = Tk()
    self.window.title("五子棋游戏")
    self.window.geometry("660x470")
    self.window.resizable(0,0)
    self.canvas=Canvas(self.window , bg="#EEE8AC" , width=470, height=470)
    self.paint_board()
    self.canvas.grid(row = 0 , column = 0)

  def paint_board(self) :
    for row in range(0,15) :
      if row == 0 or row == 14 :
        self.canvas.create_line(25 , 25+row*30 , 25+14*30 , 25+row*30 , width = 2)
      else :
        self.canvas.create_line(25 , 25+row*30 , 25+14*30 , 25+row*30 , width = 1)
    for column in range(0,15) :
      if column == 0 or column == 14 :
        self.canvas.create_line(25+column*30 ,25, 25+column*30 , 25+14*30 ,width = 2)
      else :
        self.canvas.create_line(25+column*30 ,25, 25+column*30 , 25+14*30 , width = 1)

    self.canvas.create_oval(112, 112, 118, 118, fill="black")
    self.canvas.create_oval(352, 112, 358, 118, fill="black")
    self.canvas.create_oval(112, 352, 118, 358, fill="black")
    self.canvas.create_oval(232, 232, 238, 238, fill="black")
    self.canvas.create_oval(352, 352, 358, 358, fill="black")

#定义五子棋游戏类
#0为黑子 , 1为白子 , 2为空位
class Gobang() :
  #初始化
  def __init__(self) :
    self.board = chessBoard()
    self.game_print = StringVar()
    self.game_print.set("")
    #16*16的二维列表,保证不会out of index
    self.db = [([2] * 16) for i in range(16)]
    #悔棋用的顺序列表
    self.order = []
    #棋子颜色
    self.color_count = 0
    self.color = 'black'
    #清空与赢的初始化,已赢为1,已清空为1
    self.flag_win = 1
    self.flag_empty = 1
    self.options()


  #黑白互换
  def change_color(self) :
    self.color_count = (self.color_count + 1 ) % 2
    if self.color_count == 0 :
      self.color = "black"
    elif self.color_count ==1 :
      self.color = "white"

  #落子
  def chess_moving(self ,event) :
    #不点击“开始”与“清空”无法再次开始落子
    if self.flag_win ==1 or self.flag_empty ==0 :
      return
    #坐标转化为下标
    x,y = event.x-25 , event.y-25
    x = round(x/30)
    y = round(y/30)
    #点击位置没用落子,且没有在棋盘线外,可以落子
    while self.db[y][x] == 2 and self.limit_boarder(y,x):
      self.db[y][x] = self.color_count
      self.order.append(x+15*y)
      self.board.canvas.create_oval(25+30*x-12 , 25+30*y-12 , 25+30*x+12 , 25+30*y+12 , fill = self.color,tags = "chessman")
      if self.game_win(y,x,self.color_count) :
        print(self.color,"获胜")
        self.game_print.set(self.color+"获胜")
      else :
        self.change_color()
        self.game_print.set("请"+self.color+"落子")


  #保证棋子落在棋盘上
  def limit_boarder(self , y , x) :
    if x<0 or x>14 or y<0 or y>14 :
      return False
    else :
      return True

  #计算连子的数目,并返回最大连子数目
  def chessman_count(self , y , x , color_count ) :
    count1,count2,count3,count4 = 1,1,1,1
    #横计算
    for i in range(-1 , -5 , -1) :
      if self.db[y][x+i] == color_count :
        count1 += 1
      else:
        break
    for i in range(1 , 5 ,1 ) :
      if self.db[y][x+i] == color_count :
        count1 += 1
      else:
        break
    #竖计算
    for i in range(-1 , -5 , -1) :
      if self.db[y+i][x] == color_count :
        count2 += 1
      else:
        break
    for i in range(1 , 5 ,1 ) :
      if self.db[y+i][x] == color_count :
        count2 += 1
      else:
        break
    #/计算
    for i in range(-1 , -5 , -1) :
      if self.db[y+i][x+i] == color_count :
        count3 += 1
      else:
        break
    for i in range(1 , 5 ,1 ) :
      if self.db[y+i][x+i] == color_count :
        count3 += 1
      else:
        break
    #\计算
    for i in range(-1 , -5 , -1) :
      if self.db[y+i][x-i] == color_count :
        count4 += 1
      else:
        break
    for i in range(1 , 5 ,1 ) :
      if self.db[y+i][x-i] == color_count :
        count4 += 1
      else:
        break

    return max(count1 , count2 , count3 , count4)

  #判断输赢
  def game_win(self , y , x , color_count ) :
    if self.chessman_count(y,x,color_count) >= 5 :
      self.flag_win = 1
      self.flag_empty = 0
      return True
    else :
      return False

  #悔棋,清空棋盘,再画剩下的n-1个棋子
  def withdraw(self ) :
    if len(self.order)==0 or self.flag_win == 1:
      return
    self.board.canvas.delete("chessman")
    z = self.order.pop()
    x = z%15
    y = z//15
    self.db[y][x] = 2
    self.color_count = 1
    for i in self.order :
      ix = i%15
      iy = i//15
      self.change_color()
      self.board.canvas.create_oval(25+30*ix-12 , 25+30*iy-12 , 25+30*ix+12 , 25+30*iy+12 , fill = self.color,tags = "chessman")
    self.change_color()
    self.game_print.set("请"+self.color+"落子")

  #清空
  def empty_all(self) :
    self.board.canvas.delete("chessman")
    #还原初始化
    self.db = [([2] * 16) for i in range(16)]
    self.order = []
    self.color_count = 0
    self.color = 'black'
    self.flag_win = 1
    self.flag_empty = 1
    self.game_print.set("")

  #将self.flag_win置0才能在棋盘上落子
  def game_start(self) :
    #没有清空棋子不能置0开始
    if self.flag_empty == 0:
      return
    self.flag_win = 0
    self.game_print.set("请"+self.color+"落子")

  def options(self) :
    self.board.canvas.bind("<Button-1>",self.chess_moving)
    Label(self.board.window , textvariable = self.game_print , font = ("Arial", 20) ).place(relx = 0, rely = 0 ,x = 495 , y = 200)
    Button(self.board.window , text= "开始游戏" ,command = self.game_start,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=15)
    Button(self.board.window , text= "我要悔棋" ,command = self.withdraw,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=60)
    Button(self.board.window , text= "清空棋局" ,command = self.empty_all,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=105)
    Button(self.board.window , text= "结束游戏" ,command = self.board.window.destroy,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=420)
    self.board.window.mainloop()

if __name__ == "__main__":
  game = Gobang()

存活主机扫描

__author__ = 'jxl'
import subprocess as p
import time
import threading
from queue import Queue
def check_ip(ip):
      w=p.Popen('ping -n 2 '+ip,shell=True,stdout=p.PIPE,stderr=p.PIPE,encoding='gbk')
      result=w.stdout.read()
      # print(result)
      if 'TTL' in result:
        print(ip,'is Up')
def main():
  q=Queue()
  threads=[]
  threads_count=255
  ips = '39.156.69.'   #更换ip
  for i in range(1,255):
    q.put(ips+str(i))
    # print(q.get())
  for i in range(threads_count):
    t=threading.Thread(target=check_ip,args=(q.get(),))
    t.start()
    threads.append(t)
    time.sleep(0.2)
  for i in threads:
    i.join()
  print('all done')
if __name__ == '__main__':
  main()

微信机器人

__author__ = 'jxl'
import requests
from datetime import datetime



url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
headers = {'Content-type':'application/json'}
data = {
  "msgtype": "text",
  "text": {
    "content": ".......1234......", #要发送的文字
    "mentioned_list": ["@all"], #要@的人,可以是特定的人
    }
  }
r = requests.post(url,headers=headers,json=data)
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"文本发送情况提示",r.text)

B站评论 词云

__author__ = 'jxl'
import requests
import parsel
import csv
import time
import jieba
import wordcloud
import imageio

for page in range(20,32):
    time.sleep(1)
    print('=================正在下载11月{}日弹幕===================================='.format(page))
    url = 'https://api.bilibili.com/x/v2/dm/history?type=1&oid=140610898&date=2020-11-{}'.format(page)
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
        "cookie": "__uuid=1896D3F7-4A98-54EB-F7FA-3301CE9EF5F307776infoc; buvid3=B68B2187-4C3E-4466-A896-FBF9B292099B190963infoc; LIVE_BUVID=AUTO4115757254257055; stardustvideo=1; rpdid=|(umu|ulY)JJ0J'ul~l~klRJ); sid=8cq4r229; im_notify_type_65901796=0; laboratory=1-1; DedeUserID=523606542; DedeUserID__ckMd5=909861ec223d26d8; blackside_state=1; CURRENT_FNVAL=80; SESSDATA=a976c0b4%2C1618637313%2C4d792*a1; bili_jct=7f54729ec20660f750661122b80746d2; PVID=1; bp_video_offset_523606542=458111639975213216; CURRENT_QUALITY=16; bfe_id=1e33d9ad1cb29251013800c68af42315"
    }
    response = requests.get(url=url, headers=headers)
    response.encoding = response.apparent_encoding
    response.encoding = response.apparent_encoding
    selector = parsel.Selector(response.text)
    data = selector.css('d::text').getall()
    for i in data:
        print(i)
        with open('B站弹幕.csv', mode='a', newline='', encoding='utf-8-sig') as f:
            writer = csv.writer(f)
            links = []
            links.append(i)
            writer.writerow(links)
f = open('B站弹幕.csv', encoding='utf-8')
txt = f.read()
# print(txt)
# jiabe 分词 分割词汇
txt_list = jieba.lcut(txt)
string = ' '.join(txt_list)
# 词云图设置
wc = wordcloud.WordCloud(
        width=800,         # 图片的宽
        height=500,         # 图片的高
        background_color='white',   # 图片背景颜色
        font_path='msyh.ttc',    # 词云字体
        # mask=py,     # 所使用的词云图片
        scale=15,
)
# 给词云输入文字
wc.generate(string)
# 词云图保存图片地址
wc.to_file('1.png')

爬取微信公众号文章

# !/usr/bin/python
# -*- coding: UTF-8 -*-
import os

import requests
import xlsxwriter
from lxml import etree

# 请求微信文章的头部信息
headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*#zs#;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Host': 'weixin.sogou.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}
# 下载图片的头部信息
headers_images = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*#zs#;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Host': 'img01.sogoucdn.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}
a = 0
all = []

# 创建根目录
save_path = './微信文章'
folder = os.path.exists(save_path)
if not folder:
    os.makedirs(save_path)
# 创建图片文件夹
images_path = '%s/图片' % save_path
folder = os.path.exists(images_path)
if not folder:
    os.makedirs(images_path)
for i in range(1, 9):
    for j in range(1, 5):
        url = "https://weixin.sogou.com/pcindex/pc/pc_%d/%d.html" % (i, j)
        # 请求搜狗文章的url地址
        response = requests.get(url=url, headers=headers).text.encode('iso-8859-1').decode('utf-8')
        # 构造了一个XPath解析对象并对HTML文本进行自动修正
        html = etree.HTML(response)
        # XPath使用路径表达式来选取用户名
        xpath = html.xpath('/html/body/li')
        for content in xpath:
            # 计数
            a = a + 1
            # 文章标题
            title = content.xpath('./div[@class="txt-box"]/h3//text()')[0]
            article = {}
            article['title'] = title
            article['id'] = '%d.jpg' % a
            all.append(article)
            # 图片路径
            path = 'http:' + content.xpath('./div[@class="img-box"]//img/@src')[0]
            # 下载文章图片
            images = requests.get(url=path, headers=headers_images).content
            try:
                with open('%s/%d.jpg' % (images_path, a), "wb") as f:
                    print('正在下载第%d篇文章图片' % a)
                    f.write(images)
            except Exception as e:
                print('下载文章图片失败%s' % e)
# 信息存储在excel中
# 创建一个workbookx
workbook = xlsxwriter.Workbook('%s/Excel格式.xlsx' % save_path)
# 创建一个worksheet
worksheet = workbook.add_worksheet()
print('正在生成Excel...')
try:
    for i in range(0, len(all) + 1):
        # 第一行用于写入表头
        if i == 0:
            worksheet.write(i, 0, 'title')
            worksheet.write(i, 1, 'id')
            continue
        worksheet.write(i, 0, all[i - 1]['title'])
        worksheet.write(i, 1, all[i - 1]['id'])
    workbook.close()
except Exception as e:
    print('生成Excel失败%s' % e)
print("生成Excel成功")
print('正在生成txt...')
try:
    with open('%s/数组格式.txt' % save_path, "w") as f:
        f.write(str(all))
except Exception as e:
    print('生成txt失败%s' % e)
print('生成txt成功')
print('共爬取%d篇文章' % a)

哈夫曼编码器

# 树节点类构建
class TreeNode(object):
  def __init__(self, data):
    self.val = data[0]
    self.priority = data[1]
    self.leftChild = None
    self.rightChild = None
    self.code = ""
# 创建树节点队列函数
def creatnodeQ(codes):
  q = []
  for code in codes:
    q.append(TreeNode(code))
  return q
# 为队列添加节点元素,并保证优先度从大到小排列
def addQ(queue, nodeNew):
  if len(queue) == 0:
    return [nodeNew]
  for i in range(len(queue)):
    if queue[i].priority >= nodeNew.priority:
      return queue[:i] + [nodeNew] + queue[i:]
  return queue + [nodeNew]
# 节点队列类定义
class nodeQeuen(object):

  def __init__(self, code):
    self.que = creatnodeQ(code)
    self.size = len(self.que)

  def addNode(self,node):
    self.que = addQ(self.que, node)
    self.size += 1

  def popNode(self):
    self.size -= 1
    return self.que.pop(0)
# 各个字符在字符串中出现的次数,即计算优先度
def freChar(string):
  d ={}
  for c in string:
    if not c in d:
      d[c] = 1
    else:
      d[c] += 1
  return sorted(d.items(),key=lambda x:x[1])
# 创建哈夫曼树
def creatHuffmanTree(nodeQ):
  while nodeQ.size != 1:
    node1 = nodeQ.popNode()
    node2 = nodeQ.popNode()
    r = TreeNode([None, node1.priority+node2.priority])
    r.leftChild = node1
    r.rightChild = node2
    nodeQ.addNode(r)
  return nodeQ.popNode()

codeDic1 = {}
codeDic2 = {}
# 由哈夫曼树得到哈夫曼编码表
def HuffmanCodeDic(head, x):
  global codeDic, codeList
  if head:
    HuffmanCodeDic(head.leftChild, x+'0')
    head.code += x
    if head.val:
      codeDic2[head.code] = head.val
      codeDic1[head.val] = head.code
    HuffmanCodeDic(head.rightChild, x+'1')
# 字符串编码
def TransEncode(string):
  global codeDic1
  transcode = ""
  for c in string:
    transcode += codeDic1[c]
  return transcode
# 字符串解码
def TransDecode(StringCode):
  global codeDic2
  code = ""
  ans = ""
  for ch in StringCode:
    code += ch
    if code in codeDic2:
      ans += codeDic2[code]
      code = ""
  return ans
# 举例
string = "saasdasEFKfjsdf(msd3lsdlfj"
t = nodeQeuen(freChar(string))
tree = creatHuffmanTree(t)
HuffmanCodeDic(tree, '')
print('树结构')
print(codeDic1)                #树
a = TransEncode(string)                 #加密
print('加密:'+a)
aa = TransDecode(a)                      #解密
print('解密'+aa)
print(string == aa)

网易云热评

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import urllib.request
import urllib.error
import urllib.parse
import json



def get_all_hotSong():     #获取热歌榜所有歌曲名称和id
    url='http://music.163.com/discover/toplist?id=3778678'    #网易云云音乐热歌榜url
    header={    #请求头部
        'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
    }
    request=urllib.request.Request(url=url, headers=header)
    html=urllib.request.urlopen(request).read().decode('utf8')   #打开url
    html=str(html)     #转换成str
    pat1=r'<ul class="f-hide"><li><a href="/song?id=d*?">.*</a></li></ul>'  #进行第一次筛选的正则表达式
    result=re.compile(pat1).findall(html)     #用正则表达式进行筛选
    result=result[0]     #获取tuple的第一个元素

    pat2=r'<li><a href="/song?id=d*?">(.*?)</a></li>' #进行歌名筛选的正则表达式
    pat3=r'<li><a href="/song?id=(d*?)">.*?</a></li>'  #进行歌ID筛选的正则表达式
    hot_song_name=re.compile(pat2).findall(result)    #获取所有热门歌曲名称
    hot_song_id=re.compile(pat3).findall(result)    #获取所有热门歌曲对应的Id

    return hot_song_name,hot_song_id

def get_hotComments(hot_song_name,hot_song_id):
    url='http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + hot_song_id + '?csrf_token='   #歌评url
    header={    #请求头部
   'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
    #post请求表单数据
    data={'params':'zC7fzWBKxxsm6TZ3PiRjd056g9iGHtbtc8vjTpBXshKIboaPnUyAXKze+KNi9QiEz/IieyRnZfNztp7yvTFyBXOlVQP/JdYNZw2+GRQDg7grOR2ZjroqoOU2z0TNhy+qDHKSV8ZXOnxUF93w3DA51ADDQHB0IngL+v6N8KthdVZeZBe0d3EsUFS8ZJltNRUJ','encSecKey':'4801507e42c326dfc6b50539395a4fe417594f7cf122cf3d061d1447372ba3aa804541a8ae3b3811c081eb0f2b71827850af59af411a10a1795f7a16a5189d163bc9f67b3d1907f5e6fac652f7ef66e5a1f12d6949be851fcf4f39a0c2379580a040dc53b306d5c807bf313cc0e8f39bf7d35de691c497cda1d436b808549acc'}
    postdata=urllib.parse.urlencode(data).encode('utf8')  #进行编码
    request=urllib.request.Request(url,headers=header,data=postdata)
    reponse=urllib.request.urlopen(request).read().decode('utf8')
    json_dict=json.loads(reponse)   #获取json
    hot_commit=json_dict['hotComments']  #获取json中的热门评论
    

    num=0
    fhandle=open('./song_comments','a')  #写入文件
    fhandle.write(hot_song_name+':'+'n')

    for item in hot_commit:
        num+=1
        fhandle.write(str(num)+'.'+item['content']+'n')
    fhandle.write('n==============================================nn')
    fhandle.close()




hot_song_name,hot_song_id=get_all_hotSong()  #获取热歌榜所有歌曲名称和id

num=0
while num < len(hot_song_name):    #保存所有热歌榜中的热评
    print('正在抓取第%d首歌曲热评...'%(num+1))
    get_hotComments(hot_song_name[num],hot_song_id[num])
    print('第%d首歌曲热评抓取成功'%(num+1))
    num+=1

插画素材

import ssl
import os
import urllib.request
import requests

from bs4 import BeautifulSoup
ssl._create_default_https_context = ssl._create_unverified_context
headers = {
    'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
}
if not os.path.exists('./插画素材/'):
    os.mkdir('./插画素材/')
else:
    pass
url = 'https://www.tupianzj.com/meinv/mm/meizitu/'
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, 'lxml')
images_data = soup.find('ul', class_='d1 ico3').find_all_next('li')
for image in images_data:
    image_url = image.find_all('img')
    for _ in image_url:
        print(_['src'], _['alt'])
try:
    urllib.request.urlretrieve(_['src'], './插画素材/' + _['alt'] + '.jpg')
except:
    pass

心跳检测机器人

__author__ = 'jxl'

from datetime import datetime
import requests
import sys, time

url='http://106.13.164.199'
while(1): #测试站点
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
    r= requests.get(url)
    if(r.status_code!=200):
        print('网页错误')
        url1 = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
        headers = {'Content-type':'application/json'}
        data = {
            "msgtype": "text",
            "text": {
            "content": ".......WARING......", #要发送的文字
            "mentioned_list": ["@all"], #要@的人,可以是特定的人
        }
        }
        r = requests.post(url1,headers=headers,json=data)
        print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),url+'服务器错误',r.text)
    time.sleep(600)
本文为作者admin发布,未经允许禁止转载!
上一篇 下一篇
评论
暂无评论 >_<
加入评论