您当前的位置:首页 >> 新能源
新能源

Python可视化比对《雪中悍刀行》,看看它为什么这么火?

发布时间:2025-09-17

nge(page_num): params = {'orinum': '10', 'cursor': cid} html = get_html(url, params) cid = parse_page(infolist, html) print_comment_list(infolist) save_to_txt(infolist, 'content.txt')main()2. 爬取评论家整整预定义sp.pyimport requestsimport reimport randomdef get_html(url, params): uapools = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14' ] thisua = random.choice(uapools) headers = {"User-Agent": thisua} r = requests.get(url, headers=headers, params=params) r.raise_for_status() r.encoding = r.apparent_encoding r.encoding = 'utf-8'# 未加此句注意到浏览者 return r.textdef parse_page(infolist, data): commentpat = '"time":"(.*?)"' lastpat = '"last":"(.*?)"' commentall = re.compile(commentpat, re.S).findall(data) next_cid = re.compile(lastpat).findall(data)[0] infolist.append(commentall) return next_ciddef print_comment_list(infolist): j = 0 for page in infolist: print('第' + str(j + 1) + '页') commentall = page for i in range(0, len(commentall)): print(commentall[i] + '') j += 1def save_to_txt(infolist, path): fw = open(path, 'w+', encoding='utf-8') j = 0 for page in infolist: #fw.write('第' + str(j + 1) + '页') commentall = page for i in range(0, len(commentall)): fw.write(commentall[i] + '') j += 1 fw.close()def main(): infolist = [] vid = '7579013546'; cid = "0"; page_num =3000 url = '' + vid + '/comment/v2' #print(url) for i in range(page_num): params = {'orinum': '10', 'cursor': cid} html = get_html(url, params) cid = parse_page(infolist, html) print_comment_list(infolist) save_to_txt(infolist, 'time.txt')main()二. 样本处理部分1. 评论家的整整戳转成为正常人整整time.py# coding=gbkimport csvimport timecsvFile = open("data.csv",'w',newline='',encoding='utf-8')writer = csv.writer(csvFile)csvRow = []#print(csvRow)f = open("time.txt",'r',encoding='utf-8')for line in f: csvRow = int(line) #print(csvRow) timeArray = time.localtime(csvRow) csvRow = time.strftime("%Y-%m-%d %H:%M:%S", timeArray) print(csvRow) csvRow = csvRow.split() writer.writerow(csvRow)f.close()csvFile.close()2. 评论家章节读入csvCD.py# coding=gbkimport csvcsvFile = open("content.csv",'w',newline='',encoding='utf-8')writer = csv.writer(csvFile)csvRow = []f = open("content.txt",'r',encoding='utf-8')for line in f: csvRow = line.split() writer.writerow(csvRow)f.close()csvFile.close()3. 统计研究一天各个不间断内的评论家仅py.py# coding=gbkimport csvfrom pyecharts import options as optsfrom sympy.combinatorics import Subsetfrom wordcloud import WordCloudwith open('../Spiders/data.csv') as csvfile: reader = csv.reader(csvfile) data1 = [str(row[1])[0:2] for row in reader] print(data1)print(type(data1))#可先消失给定获取seq里的所有要素,不致多次重复遍历set_seq = set(data1)rst = []for item in set_seq: rst.append((item,data1.count(item))) #掺入要素及注意到个仅rst.sort()print(type(rst))print(rst)with open("time2.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in rst: # 对于每即刻的,将这即刻的每个要素分别写在完全一致的列里 writer.writerow(i)with open('time2.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x)with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1)4. 统计研究都只评论家仅py1.py# coding=gbkimport csvfrom pyecharts import options as optsfrom sympy.combinatorics import Subsetfrom wordcloud import WordCloudwith open('../Spiders/data.csv') as csvfile: reader = csv.reader(csvfile) data1 = [str(row[0]) for row in reader] #print(data1)print(type(data1))#可先消失给定获取seq里的所有要素,不致多次重复遍历set_seq = set(data1)rst = []for item in set_seq: rst.append((item,data1.count(item))) #掺入要素及注意到个仅rst.sort()print(type(rst))print(rst)with open("time1.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in rst: # 对于每即刻的,将这即刻的每个要素分别写在完全一致的列里 writer.writerow(i)with open('time1.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x)with open('time1.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1)三. 样本研究

样本研究多方面:牵涉到了用法气象,短柄,折线,饼上图,后三者是对评论家整整与合演占比的研究,然而谷歌的评论家整整是以整整戳的形式揭示,所以要同步进行转成,再次去统计研究注意到次仅,最后,新的加了对评论家章节的心灵研究。

1. 制作团队用法气象

wc.py

import numpy as npimport reimport jiebafrom wordcloud import WordCloudfrom matplotlib import pyplot as pltfrom PIL import Image# 上面的纸制自己安装,不亦会的就百度f = open('content.txt', 'r', encoding='utf-8') # 这是样本源,也就是自已转换成用法云的样本txt = f.read() # 读取档案f.close() # 关闭档案,其实用with就好,但是懒得改了# 如果是文章的话,能够中用jieba分用法,分完之后也可以自己处理下再次转换成用法云newtxt = re.sub("[A-Za-z0-9!\%[],。]", "", txt)print(newtxt)words = jieba.lcut(newtxt)img = Image.open(r'wc.jpg') # 自已要搞得形状img_array = np.array(img)# 就其可用,里这个collocations可用可以不致多次重复wordcloud = WordCloud( background_color="white", width=1080, height=960, font_path="../文悦新的青年.otf", max_words=150, scale=10,#准确性 max_font_size=100, mask=img_array, collocations=False).generate(newtxt)plt.imshow(wordcloud)plt.axis('off')plt.show()wordcloud.to_file('wc.png')

轮廓上图:wc.jpg

用法气象:result.png (录:这里把英文字母过滤丢)

2. 制作团队都只评论家仅短柄上图

DrawBar.py

# encoding: utf-8import csvimport pyecharts.options as optsfrom pyecharts.charts import Barfrom pyecharts.globals import ThemeTypeclass DrawBar(object): """所画柱形上图类""" def originallyinitoriginally(self): """创建柱状上图下述,并设宽高和个人风格""" self.bar = Bar(init_opts=opts.InitOpts(width='1500px', height='700px', theme=ThemeType.LIGHT)) def add_x(self): """为三维掺入X传动装置样本""" with open('time1.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x) self.bar.add_xaxis( xaxis_data=x, ) def add_y(self): with open('time1.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1) """为三维掺入Y传动装置样本,可掺入多条""" self.bar.add_yaxis( # 第一个Y传动装置样本 series_, # Y传动装置样本名字 y_axis=y1, # Y传动装置样本 label_opts=opts.LabelOpts(is_show=True,color="black"), # 设标签 bar_max_width='100px', # 设圆柱最大者阔度 ) def set_global(self): """设三维的在实践中属性""" #self.bar(width=2000,height=1000) self.bar.set_global_opts( title_opts=opts.TitleOpts( # 设曲名 title='雪里悍刀行近来评论家统计研究',title_textstyle_opts=opts.TextStyleOpts(font_size=35) ), tooltip_opts=opts.TooltipOpts( # 提示框可用项(打印机移往到三维上时揭示的东西) is_show=True, # 确实揭示提示框 trigger="axis", # 系统会型式(axis坐标传动装置系统会,打印机移往到更亦会有一条面上X传动装置的则有跟着打印机移往动,并揭示提示信息) axis_pointer_type="cross"# 指示器型式(cross将亦会转换成两条分别面上X传动装置和Y传动装置的方格,不动工trigger才亦会揭示完全) ), toolbox_opts=opts.ToolboxOpts(), # 基本功能箱可用项(什么都不碎石当前启动时所有基本功能) ) def draw(self): """所画三维""" self.add_x() self.add_y() self.set_global() self.bar.render('../Html/DrawBar.html') # 将上图所画到 test.html 档案内,可在JavaScript推入 def run(self): """执行算子""" self.draw()if originallynameoriginally == 'originallymainoriginally': app = DrawBar()app.run()

效果上图:DrawBar.html

3. 制作团队每天内评论家短柄上图

DrawBar2.py

# encoding: utf-8# encoding: utf-8import csvimport pyecharts.options as optsfrom pyecharts.charts import Barfrom pyecharts.globals import ThemeTypeclass DrawBar(object): """所画柱形上图类""" def originallyinitoriginally(self): """创建柱状上图下述,并设宽高和个人风格""" self.bar = Bar(init_opts=opts.InitOpts(width='1500px', height='700px', theme=ThemeType.MACARONS)) def add_x(self): """为三维掺入X传动装置样本""" str_name1 = '点' with open('time2.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0] + str_name1) for row in reader] print(x) self.bar.add_xaxis( xaxis_data=x ) def add_y(self): with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [int(row[1]) for row in reader] print(y1) """为三维掺入Y传动装置样本,可掺入多条""" self.bar.add_yaxis( # 第一个Y传动装置样本 series_, # Y传动装置样本名字 y_axis=y1, # Y传动装置样本 label_opts=opts.LabelOpts(is_show=False), # 设标签 bar_max_width='50px', # 设圆柱最大者阔度 ) def set_global(self): """设三维的在实践中属性""" #self.bar(width=2000,height=1000) self.bar.set_global_opts( title_opts=opts.TitleOpts( # 设曲名 title='雪里悍刀行各不间断评论家统计研究',title_textstyle_opts=opts.TextStyleOpts(font_size=35) ), tooltip_opts=opts.TooltipOpts( # 提示框可用项(打印机移往到三维上时揭示的东西) is_show=True, # 确实揭示提示框 trigger="axis", # 系统会型式(axis坐标传动装置系统会,打印机移往到更亦会有一条面上X传动装置的则有跟着打印机移往动,并揭示提示信息) axis_pointer_type="cross"# 指示器型式(cross将亦会转换成两条分别面上X传动装置和Y传动装置的方格,不动工trigger才亦会揭示完全) ), toolbox_opts=opts.ToolboxOpts(), # 基本功能箱可用项(什么都不碎石当前启动时所有基本功能) ) def draw(self): """所画三维""" self.add_x() self.add_y() self.set_global() self.bar.render('../Html/DrawBar2.html') # 将上图所画到 test.html 档案内,可在JavaScript推入 def run(self): """执行算子""" self.draw()if originallynameoriginally == 'originallymainoriginally': app = DrawBar()app.run()

效果上图:DrawBar2.html

4. 制作团队近来评论家仅饼上图pie_pyecharts.pyimport csvfrom pyecharts import options as optsfrom pyecharts.charts import Piefrom random import randintfrom pyecharts.globals import ThemeTypewith open('time1.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]) for row in reader] print(x)with open('time1.csv') as csvfile: reader = csv.reader(csvfile) y1 = [float(row[1]) for row in reader] print(y1)num = y1lab = x( Pie(init_opts=opts.InitOpts(width='1700px',height='450px',theme=ThemeType.LIGHT))#当前900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪里悍刀行近来评论家统计研究", title_textstyle_opts=opts.TextStyleOpts(font_size=27)),legend_opts=opts.LegendOpts( pos_top="10%", pos_left="1%",# 上则有前方更改 ),) .add(series_name='',center=[280, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼上图 .add(series_name='',center=[845, 270],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环上图 .add(series_name='', center=[1380, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#菲利普斯上图).render('pie_pyecharts.html')

效果上图

5. 制作团队每天内评论家饼上图

pie_pyecharts2.py

import csvfrom pyecharts import options as optsfrom pyecharts.charts import Piefrom random import randintfrom pyecharts.globals import ThemeTypestr_name1 = '点'with open('time2.csv') as csvfile: reader = csv.reader(csvfile) x = [str(row[0]+str_name1) for row in reader] print(x)with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [int(row[1]) for row in reader] print(y1)num = y1lab = x( Pie(init_opts=opts.InitOpts(width='1650px',height='500px',theme=ThemeType.LIGHT,))#当前900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪里悍刀行每天内评论家统计研究" ,title_textstyle_opts=opts.TextStyleOpts(font_size=27)), legend_opts=opts.LegendOpts( pos_top="8%", pos_left="4%",# 上则有前方更改 ), ) .add(series_name='',center=[250, 300], data_pair=[(j, i) for i, j in zip(num, lab)])#饼上图 .add(series_name='',center=[810, 300],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环上图 .add(series_name='', center=[1350, 300],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#菲利普斯上图).render('pie_pyecharts2.html')

效果上图

6. 制作团队进场整整下行评论家统计研究饼上图

pie_pyecharts3.py

# coding=gbkimport csvfrom pyecharts import options as optsfrom pyecharts.globals import ThemeTypefrom sympy.combinatorics import Subsetfrom wordcloud import WordCloudfrom pyecharts.charts import Piefrom random import randintwith open(/data.csv') as csvfile: reader = csv.reader(csvfile) data2 = [int(row[1].strip('')[0:2]) for row in reader] #print(data2)print(type(data2))#可先消失给定获取seq里的所有要素,不致多次重复遍历set_seq = set(data2)list = []for item in set_seq: list.append((item,data2.count(item))) #掺入要素及注意到个仅list.sort()print(type(list))#print(list)with open("time2.csv", "w+", newline='', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',') for i in list: # 对于每即刻的,将这即刻的每个要素分别写在完全一致的列里 writer.writerow(i)n = 4#分成n分组m = int(len(list)/n)list2 = []for i in range(0, len(list), m): list2.append(list[i:i+m])print("凌晨 : ",list2[0])print("下午 : ",list2[1])print("下午 : ",list2[2])print("早晨 : ",list2[3])with open('time2.csv') as csvfile: reader = csv.reader(csvfile) y1 = [int(row[1]) for row in reader] print(y1)n =6groups = [y1[i:i + n] for i in range(0, len(y1), n)]print(groups)x=['凌晨','下午','下午','早晨']y1=[]for y1 in groups: num_sum = 0 for groups in y1: num_sum += groupsstr_name1 = '点'num = y1lab = x( Pie(init_opts=opts.InitOpts(width='1500px',height='450px',theme=ThemeType.LIGHT))#当前900,600 .set_global_opts( title_opts=opts.TitleOpts(title="雪里悍刀行进场整整下行评论家统计研究" , title_textstyle_opts=opts.TextStyleOpts(font_size=30)), legend_opts=opts.LegendOpts( pos_top="8%", # 上则有前方更改 ), ) .add(series_name='',center=[260, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼上图 .add(series_name='',center=[1230, 270],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环上图 .add(series_name='', center=[750, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#菲利普斯上图).render('pie_pyecharts3.html')

效果上图

7. 制作团队雪里悍刀行合演提及占比饼上图

pie_pyecharts4.py

import csvfrom pyecharts import options as optsfrom pyecharts.charts import Piefrom random import randintfrom pyecharts.globals import ThemeTypef = open('content.txt', 'r', encoding='utf-8') # 这是样本源,也就是自已转换成用法云的样本words = f.read() # 读取档案f.close() # 关闭档案,其实用with就好,但是懒得改了name=["张若昀","李庚希","吕良伟"]print(name)count=[float(words.count("张若昀")),float(words.count("李庚希")),float(words.count("吕良伟"))]print(count)num = countlab = name(Pie(init_opts=opts.InitOpts(width='1650px',height='450px',theme=ThemeType.LIGHT))#当前900,600.set_global_opts(title_opts=opts.TitleOpts(title="雪里悍刀行合演提及占比",title_textstyle_opts=opts.TextStyleOpts(font_size=27)),legend_opts=opts.LegendOpts(pos_top="3%", pos_left="33%",# 上则有前方更改),).add(series_name='',center=[280, 270], data_pair=[(j, i) for i, j in zip(num, lab)])#饼上图.add(series_name='',center=[800, 270],data_pair=[(j,i) for i,j in zip(num,lab)],radius=['40%','75%'])#环上图.add(series_name='', center=[1300, 270],data_pair=[(j, i) for i, j in zip(num, lab)], rosetype='radius')#菲利普斯上图).render('pie_pyecharts4.html')

效果上图

8. 评论家章节心灵研究

SnowNLP.py

import numpy as npfrom snownlp import SnowNLPimport matplotlib.pyplot as pltf = open('content.txt', 'r', encoding='UTF-8')list = f.readlines()sentimentslist = []for i in list:s = SnowNLP(i)print(s.sentiments)sentimentslist.append(s.sentiments)plt.hist(sentimentslist, bins=np.arange(0, 1, 0.01), facecolor='g')plt.xlabel('Sentiments Probability')plt.ylabel('Quantity')plt.title('Analysis of Sentiments')plt.show()

效果上图(心灵各分仅段注意到Hz)

SnowNLP心灵研究是基于心灵用法典付诸的,其非常简单的将脚注分为两类,尽力和消极,返回差值为冲动的概率,也就是心灵评分在[0,1]之间,越加近1,心灵显出越加尽力,越加近0,心灵显出越加消极。

阐释

以上就是如何提专用谷歌录像带评论家并整理后同步进行应用软件研究,分属样本研究及应用软件的基本上转换,未特别所学的技术,还比起适宜样本研究初学者练习,听众可以自行想到在此之后。全部预定义已在文里假定。

滨州哪个医院治疗白癜风最好
武汉白癜风治疗费用
百色治疗白癜风的医院

上一篇: 股价跌跌不休,业绩难逃亏损,重整完毕的*ST会集往何处去?

下一篇: 毛里求斯中国文化中心启动2022“东北冰雪旅游海外推广季”

友情链接