
typecho阅读量爬取
__author__ = 'jxl'
import requests
# 用于解决爬取的数据格式化
import io
import sys
import re
from datetime import datetime
class ShowProcess():
"""
显示处理进度的类
调用该类相关函数即可实现处理进度的显示
"""
i = 0 # 当前的处理进度
max_steps = 0 # 总共需要处理的次数
max_arrow = 50 #进度条的长度
infoDone = 'done'
# 初始化函数,需要知道总共的处理次数
def __init__(self, max_steps, infoDone = 'Done'):
self.max_steps = max_steps
self.i = 0
self.infoDone = infoDone
# 显示函数,根据当前的处理进度i显示进度
# 效果为[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]100.00%
def show_process(self, i=None):
if i is not None:
self.i = i
else:
self.i += 1
num_arrow = int(self.i * self.max_arrow / self.max_steps) #计算显示多少个'>'
num_line = self.max_arrow - num_arrow #计算显示多少个'-'
percent = self.i * 100.0 / self.max_steps #计算完成进度,格式为xx.xx%
process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\
+ '%.2f' % percent + '%' + '\r' #带输出的字符串,'\r'表示不换行回到最左边
sys.stdout.write(process_bar) #这两句打印字符到终端
sys.stdout.flush()
if self.i >= self.max_steps:
self.close()
def close(self):
print('')
print(self.infoDone)
self.i = 0
if __name__=='__main__':
cishu=500
process_bar = ShowProcess(cishu-1, 'OK')
url='http://blog.52i.xyz'
s=0
shu=0
for num in range(1, cishu, 1) :
num1=str(num)
url1=url+'/index.php/archives/'+num1
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
# 爬取的网页链接
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
r= requests.get(url1)
# 类型
# print(type(r))
#print(r.status_code)
# 中文显示
# r.encoding='utf-8'
r.encoding=None
#print(r.encoding)
#print(r.text)
result = r.text
salary_list = re.findall(r"([\d.]+) 次阅读", result)
for salary in salary_list:
# print(url1)
salary1=int(salary)
s=s+salary1
# process_bar.show_process()
print('文章阅读量:'+salary)
shu=shu+1
# print(s)
s1=str(s)
Shu=str(shu)
print('博客: '+url+'的文章数为:'+Shu+'\n 总阅读量为:'+s1)
neirong='博客: '+url+'的文章数为:'+Shu+'\n 总阅读量为:'+s1
urll = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
headers = {'Content-type':'application/json'}
data = {
"msgtype": "text",
"text": {
"content":neirong, #要发送的文字
"mentioned_list": ["@all"], #要@的人,可以是特定的人
}
}
r = requests.post(urll,headers=headers,json=data)
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"文本发送情况提示",r.text)
爬取typecho文章名
__author__ = 'jxl'
import requests
import re
cishu=500
url='http://blog.52i.xyz'
s=0
shu=0
for num in range(1, cishu, 1) :
num1=str(num)
url1=url+'/index.php/archives/'+num1
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
# 爬取的网页链接
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
r= requests.get(url1)
# 类型
# print(type(r))
#print(r.status_code)
# 中文显示
# r.encoding='utf-8'
r.encoding=None
#print(r.encoding)
# print(r.text) <title>(.*)</title
result = r.text
salaryt = re.search('<title>(.*)</title',result)
if(r.status_code==200):
print(salaryt)
五子棋
__author__ = 'jxl'
from tkinter import *
import math
#定义棋盘类
class chessBoard() :
def __init__(self) :
self.window = Tk()
self.window.title("五子棋游戏")
self.window.geometry("660x470")
self.window.resizable(0,0)
self.canvas=Canvas(self.window , bg="#EEE8AC" , width=470, height=470)
self.paint_board()
self.canvas.grid(row = 0 , column = 0)
def paint_board(self) :
for row in range(0,15) :
if row == 0 or row == 14 :
self.canvas.create_line(25 , 25+row*30 , 25+14*30 , 25+row*30 , width = 2)
else :
self.canvas.create_line(25 , 25+row*30 , 25+14*30 , 25+row*30 , width = 1)
for column in range(0,15) :
if column == 0 or column == 14 :
self.canvas.create_line(25+column*30 ,25, 25+column*30 , 25+14*30 ,width = 2)
else :
self.canvas.create_line(25+column*30 ,25, 25+column*30 , 25+14*30 , width = 1)
self.canvas.create_oval(112, 112, 118, 118, fill="black")
self.canvas.create_oval(352, 112, 358, 118, fill="black")
self.canvas.create_oval(112, 352, 118, 358, fill="black")
self.canvas.create_oval(232, 232, 238, 238, fill="black")
self.canvas.create_oval(352, 352, 358, 358, fill="black")
#定义五子棋游戏类
#0为黑子 , 1为白子 , 2为空位
class Gobang() :
#初始化
def __init__(self) :
self.board = chessBoard()
self.game_print = StringVar()
self.game_print.set("")
#16*16的二维列表,保证不会out of index
self.db = [([2] * 16) for i in range(16)]
#悔棋用的顺序列表
self.order = []
#棋子颜色
self.color_count = 0
self.color = 'black'
#清空与赢的初始化,已赢为1,已清空为1
self.flag_win = 1
self.flag_empty = 1
self.options()
#黑白互换
def change_color(self) :
self.color_count = (self.color_count + 1 ) % 2
if self.color_count == 0 :
self.color = "black"
elif self.color_count ==1 :
self.color = "white"
#落子
def chess_moving(self ,event) :
#不点击“开始”与“清空”无法再次开始落子
if self.flag_win ==1 or self.flag_empty ==0 :
return
#坐标转化为下标
x,y = event.x-25 , event.y-25
x = round(x/30)
y = round(y/30)
#点击位置没用落子,且没有在棋盘线外,可以落子
while self.db[y][x] == 2 and self.limit_boarder(y,x):
self.db[y][x] = self.color_count
self.order.append(x+15*y)
self.board.canvas.create_oval(25+30*x-12 , 25+30*y-12 , 25+30*x+12 , 25+30*y+12 , fill = self.color,tags = "chessman")
if self.game_win(y,x,self.color_count) :
print(self.color,"获胜")
self.game_print.set(self.color+"获胜")
else :
self.change_color()
self.game_print.set("请"+self.color+"落子")
#保证棋子落在棋盘上
def limit_boarder(self , y , x) :
if x<0 or x>14 or y<0 or y>14 :
return False
else :
return True
#计算连子的数目,并返回最大连子数目
def chessman_count(self , y , x , color_count ) :
count1,count2,count3,count4 = 1,1,1,1
#横计算
for i in range(-1 , -5 , -1) :
if self.db[y][x+i] == color_count :
count1 += 1
else:
break
for i in range(1 , 5 ,1 ) :
if self.db[y][x+i] == color_count :
count1 += 1
else:
break
#竖计算
for i in range(-1 , -5 , -1) :
if self.db[y+i][x] == color_count :
count2 += 1
else:
break
for i in range(1 , 5 ,1 ) :
if self.db[y+i][x] == color_count :
count2 += 1
else:
break
#/计算
for i in range(-1 , -5 , -1) :
if self.db[y+i][x+i] == color_count :
count3 += 1
else:
break
for i in range(1 , 5 ,1 ) :
if self.db[y+i][x+i] == color_count :
count3 += 1
else:
break
#\计算
for i in range(-1 , -5 , -1) :
if self.db[y+i][x-i] == color_count :
count4 += 1
else:
break
for i in range(1 , 5 ,1 ) :
if self.db[y+i][x-i] == color_count :
count4 += 1
else:
break
return max(count1 , count2 , count3 , count4)
#判断输赢
def game_win(self , y , x , color_count ) :
if self.chessman_count(y,x,color_count) >= 5 :
self.flag_win = 1
self.flag_empty = 0
return True
else :
return False
#悔棋,清空棋盘,再画剩下的n-1个棋子
def withdraw(self ) :
if len(self.order)==0 or self.flag_win == 1:
return
self.board.canvas.delete("chessman")
z = self.order.pop()
x = z%15
y = z//15
self.db[y][x] = 2
self.color_count = 1
for i in self.order :
ix = i%15
iy = i//15
self.change_color()
self.board.canvas.create_oval(25+30*ix-12 , 25+30*iy-12 , 25+30*ix+12 , 25+30*iy+12 , fill = self.color,tags = "chessman")
self.change_color()
self.game_print.set("请"+self.color+"落子")
#清空
def empty_all(self) :
self.board.canvas.delete("chessman")
#还原初始化
self.db = [([2] * 16) for i in range(16)]
self.order = []
self.color_count = 0
self.color = 'black'
self.flag_win = 1
self.flag_empty = 1
self.game_print.set("")
#将self.flag_win置0才能在棋盘上落子
def game_start(self) :
#没有清空棋子不能置0开始
if self.flag_empty == 0:
return
self.flag_win = 0
self.game_print.set("请"+self.color+"落子")
def options(self) :
self.board.canvas.bind("<Button-1>",self.chess_moving)
Label(self.board.window , textvariable = self.game_print , font = ("Arial", 20) ).place(relx = 0, rely = 0 ,x = 495 , y = 200)
Button(self.board.window , text= "开始游戏" ,command = self.game_start,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=15)
Button(self.board.window , text= "我要悔棋" ,command = self.withdraw,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=60)
Button(self.board.window , text= "清空棋局" ,command = self.empty_all,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=105)
Button(self.board.window , text= "结束游戏" ,command = self.board.window.destroy,width = 13, font = ("Verdana", 12)).place(relx=0, rely=0, x=495, y=420)
self.board.window.mainloop()
if __name__ == "__main__":
game = Gobang()
存活主机扫描
__author__ = 'jxl'
import subprocess as p
import time
import threading
from queue import Queue
def check_ip(ip):
w=p.Popen('ping -n 2 '+ip,shell=True,stdout=p.PIPE,stderr=p.PIPE,encoding='gbk')
result=w.stdout.read()
# print(result)
if 'TTL' in result:
print(ip,'is Up')
def main():
q=Queue()
threads=[]
threads_count=255
ips = '39.156.69.' #更换ip
for i in range(1,255):
q.put(ips+str(i))
# print(q.get())
for i in range(threads_count):
t=threading.Thread(target=check_ip,args=(q.get(),))
t.start()
threads.append(t)
time.sleep(0.2)
for i in threads:
i.join()
print('all done')
if __name__ == '__main__':
main()
微信机器人
__author__ = 'jxl'
import requests
from datetime import datetime
url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
headers = {'Content-type':'application/json'}
data = {
"msgtype": "text",
"text": {
"content": ".......1234......", #要发送的文字
"mentioned_list": ["@all"], #要@的人,可以是特定的人
}
}
r = requests.post(url,headers=headers,json=data)
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"文本发送情况提示",r.text)
B站评论 词云
__author__ = 'jxl'
import requests
import parsel
import csv
import time
import jieba
import wordcloud
import imageio
for page in range(20,32):
time.sleep(1)
print('=================正在下载11月{}日弹幕===================================='.format(page))
url = 'https://api.bilibili.com/x/v2/dm/history?type=1&oid=140610898&date=2020-11-{}'.format(page)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
"cookie": "__uuid=1896D3F7-4A98-54EB-F7FA-3301CE9EF5F307776infoc; buvid3=B68B2187-4C3E-4466-A896-FBF9B292099B190963infoc; LIVE_BUVID=AUTO4115757254257055; stardustvideo=1; rpdid=|(umu|ulY)JJ0J'ul~l~klRJ); sid=8cq4r229; im_notify_type_65901796=0; laboratory=1-1; DedeUserID=523606542; DedeUserID__ckMd5=909861ec223d26d8; blackside_state=1; CURRENT_FNVAL=80; SESSDATA=a976c0b4%2C1618637313%2C4d792*a1; bili_jct=7f54729ec20660f750661122b80746d2; PVID=1; bp_video_offset_523606542=458111639975213216; CURRENT_QUALITY=16; bfe_id=1e33d9ad1cb29251013800c68af42315"
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
response.encoding = response.apparent_encoding
selector = parsel.Selector(response.text)
data = selector.css('d::text').getall()
for i in data:
print(i)
with open('B站弹幕.csv', mode='a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
links = []
links.append(i)
writer.writerow(links)
f = open('B站弹幕.csv', encoding='utf-8')
txt = f.read()
# print(txt)
# jiabe 分词 分割词汇
txt_list = jieba.lcut(txt)
string = ' '.join(txt_list)
# 词云图设置
wc = wordcloud.WordCloud(
width=800, # 图片的宽
height=500, # 图片的高
background_color='white', # 图片背景颜色
font_path='msyh.ttc', # 词云字体
# mask=py, # 所使用的词云图片
scale=15,
)
# 给词云输入文字
wc.generate(string)
# 词云图保存图片地址
wc.to_file('1.png')
爬取微信公众号文章
# !/usr/bin/python
# -*- coding: UTF-8 -*-
import os
import requests
import xlsxwriter
from lxml import etree
# 请求微信文章的头部信息
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*#zs#;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Host': 'weixin.sogou.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}
# 下载图片的头部信息
headers_images = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*#zs#;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Host': 'img01.sogoucdn.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}
a = 0
all = []
# 创建根目录
save_path = './微信文章'
folder = os.path.exists(save_path)
if not folder:
os.makedirs(save_path)
# 创建图片文件夹
images_path = '%s/图片' % save_path
folder = os.path.exists(images_path)
if not folder:
os.makedirs(images_path)
for i in range(1, 9):
for j in range(1, 5):
url = "https://weixin.sogou.com/pcindex/pc/pc_%d/%d.html" % (i, j)
# 请求搜狗文章的url地址
response = requests.get(url=url, headers=headers).text.encode('iso-8859-1').decode('utf-8')
# 构造了一个XPath解析对象并对HTML文本进行自动修正
html = etree.HTML(response)
# XPath使用路径表达式来选取用户名
xpath = html.xpath('/html/body/li')
for content in xpath:
# 计数
a = a + 1
# 文章标题
title = content.xpath('./div[@class="txt-box"]/h3//text()')[0]
article = {}
article['title'] = title
article['id'] = '%d.jpg' % a
all.append(article)
# 图片路径
path = 'http:' + content.xpath('./div[@class="img-box"]//img/@src')[0]
# 下载文章图片
images = requests.get(url=path, headers=headers_images).content
try:
with open('%s/%d.jpg' % (images_path, a), "wb") as f:
print('正在下载第%d篇文章图片' % a)
f.write(images)
except Exception as e:
print('下载文章图片失败%s' % e)
# 信息存储在excel中
# 创建一个workbookx
workbook = xlsxwriter.Workbook('%s/Excel格式.xlsx' % save_path)
# 创建一个worksheet
worksheet = workbook.add_worksheet()
print('正在生成Excel...')
try:
for i in range(0, len(all) + 1):
# 第一行用于写入表头
if i == 0:
worksheet.write(i, 0, 'title')
worksheet.write(i, 1, 'id')
continue
worksheet.write(i, 0, all[i - 1]['title'])
worksheet.write(i, 1, all[i - 1]['id'])
workbook.close()
except Exception as e:
print('生成Excel失败%s' % e)
print("生成Excel成功")
print('正在生成txt...')
try:
with open('%s/数组格式.txt' % save_path, "w") as f:
f.write(str(all))
except Exception as e:
print('生成txt失败%s' % e)
print('生成txt成功')
print('共爬取%d篇文章' % a)
哈夫曼编码器
# 树节点类构建
class TreeNode(object):
def __init__(self, data):
self.val = data[0]
self.priority = data[1]
self.leftChild = None
self.rightChild = None
self.code = ""
# 创建树节点队列函数
def creatnodeQ(codes):
q = []
for code in codes:
q.append(TreeNode(code))
return q
# 为队列添加节点元素,并保证优先度从大到小排列
def addQ(queue, nodeNew):
if len(queue) == 0:
return [nodeNew]
for i in range(len(queue)):
if queue[i].priority >= nodeNew.priority:
return queue[:i] + [nodeNew] + queue[i:]
return queue + [nodeNew]
# 节点队列类定义
class nodeQeuen(object):
def __init__(self, code):
self.que = creatnodeQ(code)
self.size = len(self.que)
def addNode(self,node):
self.que = addQ(self.que, node)
self.size += 1
def popNode(self):
self.size -= 1
return self.que.pop(0)
# 各个字符在字符串中出现的次数,即计算优先度
def freChar(string):
d ={}
for c in string:
if not c in d:
d[c] = 1
else:
d[c] += 1
return sorted(d.items(),key=lambda x:x[1])
# 创建哈夫曼树
def creatHuffmanTree(nodeQ):
while nodeQ.size != 1:
node1 = nodeQ.popNode()
node2 = nodeQ.popNode()
r = TreeNode([None, node1.priority+node2.priority])
r.leftChild = node1
r.rightChild = node2
nodeQ.addNode(r)
return nodeQ.popNode()
codeDic1 = {}
codeDic2 = {}
# 由哈夫曼树得到哈夫曼编码表
def HuffmanCodeDic(head, x):
global codeDic, codeList
if head:
HuffmanCodeDic(head.leftChild, x+'0')
head.code += x
if head.val:
codeDic2[head.code] = head.val
codeDic1[head.val] = head.code
HuffmanCodeDic(head.rightChild, x+'1')
# 字符串编码
def TransEncode(string):
global codeDic1
transcode = ""
for c in string:
transcode += codeDic1[c]
return transcode
# 字符串解码
def TransDecode(StringCode):
global codeDic2
code = ""
ans = ""
for ch in StringCode:
code += ch
if code in codeDic2:
ans += codeDic2[code]
code = ""
return ans
# 举例
string = "saasdasEFKfjsdf(msd3lsdlfj"
t = nodeQeuen(freChar(string))
tree = creatHuffmanTree(t)
HuffmanCodeDic(tree, '')
print('树结构')
print(codeDic1) #树
a = TransEncode(string) #加密
print('加密:'+a)
aa = TransDecode(a) #解密
print('解密'+aa)
print(string == aa)
网易云热评
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import urllib.request
import urllib.error
import urllib.parse
import json
def get_all_hotSong(): #获取热歌榜所有歌曲名称和id
url='http://music.163.com/discover/toplist?id=3778678' #网易云云音乐热歌榜url
header={ #请求头部
'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
request=urllib.request.Request(url=url, headers=header)
html=urllib.request.urlopen(request).read().decode('utf8') #打开url
html=str(html) #转换成str
pat1=r'<ul class="f-hide"><li><a href="/song?id=d*?">.*</a></li></ul>' #进行第一次筛选的正则表达式
result=re.compile(pat1).findall(html) #用正则表达式进行筛选
result=result[0] #获取tuple的第一个元素
pat2=r'<li><a href="/song?id=d*?">(.*?)</a></li>' #进行歌名筛选的正则表达式
pat3=r'<li><a href="/song?id=(d*?)">.*?</a></li>' #进行歌ID筛选的正则表达式
hot_song_name=re.compile(pat2).findall(result) #获取所有热门歌曲名称
hot_song_id=re.compile(pat3).findall(result) #获取所有热门歌曲对应的Id
return hot_song_name,hot_song_id
def get_hotComments(hot_song_name,hot_song_id):
url='http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + hot_song_id + '?csrf_token=' #歌评url
header={ #请求头部
'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
#post请求表单数据
data={'params':'zC7fzWBKxxsm6TZ3PiRjd056g9iGHtbtc8vjTpBXshKIboaPnUyAXKze+KNi9QiEz/IieyRnZfNztp7yvTFyBXOlVQP/JdYNZw2+GRQDg7grOR2ZjroqoOU2z0TNhy+qDHKSV8ZXOnxUF93w3DA51ADDQHB0IngL+v6N8KthdVZeZBe0d3EsUFS8ZJltNRUJ','encSecKey':'4801507e42c326dfc6b50539395a4fe417594f7cf122cf3d061d1447372ba3aa804541a8ae3b3811c081eb0f2b71827850af59af411a10a1795f7a16a5189d163bc9f67b3d1907f5e6fac652f7ef66e5a1f12d6949be851fcf4f39a0c2379580a040dc53b306d5c807bf313cc0e8f39bf7d35de691c497cda1d436b808549acc'}
postdata=urllib.parse.urlencode(data).encode('utf8') #进行编码
request=urllib.request.Request(url,headers=header,data=postdata)
reponse=urllib.request.urlopen(request).read().decode('utf8')
json_dict=json.loads(reponse) #获取json
hot_commit=json_dict['hotComments'] #获取json中的热门评论
num=0
fhandle=open('./song_comments','a') #写入文件
fhandle.write(hot_song_name+':'+'n')
for item in hot_commit:
num+=1
fhandle.write(str(num)+'.'+item['content']+'n')
fhandle.write('n==============================================nn')
fhandle.close()
hot_song_name,hot_song_id=get_all_hotSong() #获取热歌榜所有歌曲名称和id
num=0
while num < len(hot_song_name): #保存所有热歌榜中的热评
print('正在抓取第%d首歌曲热评...'%(num+1))
get_hotComments(hot_song_name[num],hot_song_id[num])
print('第%d首歌曲热评抓取成功'%(num+1))
num+=1
插画素材
import ssl
import os
import urllib.request
import requests
from bs4 import BeautifulSoup
ssl._create_default_https_context = ssl._create_unverified_context
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
}
if not os.path.exists('./插画素材/'):
os.mkdir('./插画素材/')
else:
pass
url = 'https://www.tupianzj.com/meinv/mm/meizitu/'
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, 'lxml')
images_data = soup.find('ul', class_='d1 ico3').find_all_next('li')
for image in images_data:
image_url = image.find_all('img')
for _ in image_url:
print(_['src'], _['alt'])
try:
urllib.request.urlretrieve(_['src'], './插画素材/' + _['alt'] + '.jpg')
except:
pass
心跳检测机器人
__author__ = 'jxl'
from datetime import datetime
import requests
import sys, time
url='http://106.13.164.199'
while(1): #测试站点
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
r= requests.get(url)
if(r.status_code!=200):
print('网页错误')
url1 = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7b46ff77-d915-4231-a813-6c48dc48b607' #机器人的webhook地址
headers = {'Content-type':'application/json'}
data = {
"msgtype": "text",
"text": {
"content": ".......WARING......", #要发送的文字
"mentioned_list": ["@all"], #要@的人,可以是特定的人
}
}
r = requests.post(url1,headers=headers,json=data)
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),url+'服务器错误',r.text)
time.sleep(600)