用python下载vip的电影

正文:

一.在线解析vip视频,可直接在线观看

http://www.wmxz.wang/video.php?url=付费电影网址
http://www.vipjiexi.com/tong.php?url=付费电影网址
http://www.wq114.org

二.下载视频

爬取:
http://cuijiahua.com/blog/2017/10/spider_tutorial_1.html   (可重点看)
下载ts:
https://blog.csdn.net/lingluofengzang/article/details/78159981
https://blog.csdn.net/lingdongtianxia/article/details/82886925
http://www.cnblogs.com/chenlove/p/9518612.html   (ts的多线程下载,可重点看)
合并ts文件:
https://www.cnblogs.com/chenlove/p/9519172.html
ts和m3u8的格式:
https://blog.csdn.net/a33445621/article/details/80377424
https://blog.csdn.net/baidu_34418350/article/details/64922512
 

三.附上博主的代码:

pyinstaller -F vip.py  生成exe文件
博主换成进程,是会触发爬虫机制,线程有一点麻烦是要等完成才能进行合并文件操作

#coding=utf-8
import requests
from bs4 import BeautifulSoup
import threading
import re
import json
import codecs
import time
from lxml import html
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import random
import os
'''
class video_downloader():
	def __init__(self,url):
		self.server="http://api.xfsub.com"
		self.api="http://api.xfsub.com/xfsub_api/?url="
		self.get_url_api="http://api.xfsub.com/xfsub_api/url.php"
		self.url=url.split('#')[0]
		self.target=self.api+self.url
		self.s=requests.session()
	def get_key(self):
		req=self.s.get(url=self.target)
		req.encoding='utf-8'
		self.info=json.load(re.findall('"url.php",\ (.*),',req.text)[0])
'''
start_time=time.time()
print len(sys.argv)
if len(sys.argv) != 7:
	print "thread default is 50,catalog default is ts"
	print "example:"
	print "python vip.py -u http://www.baidu.com -thread 50 -c ts"
if len(sys.argv) !=3:
	print "python vip.py -u http://www.baidu.com"
	sys.exit()
if len(sys.argv) ==7:
	if sys.argv[4].isdigit():
		num_thread=sys.argv[4]
	dir_name=sys.argv[6]
else:
	num_thread=50
	dir_name="ts"
if not os.path.isdir(dir_name):
	os.mkdir(dir_name)
#https://www.iqiyi.com/v_19rrf2nw1g.html
url=sys.argv[2]
#url="https://www.iqiyi.com/v_19rrf2nw1g.html"
def Handler(start, end, filename):
	#headers = {'Range': 'bytes=%d-%d' % (start, end-1)}
	#r = requests.get(url, headers=headers, stream=True)
	for i in filename[start:end]:
		#global count
		try:
			r = requests.get("https://cdn.letv-cdn.com/"+str(i).replace("\n",""),stream=True)
			#r = requests.get(url)
			with codecs.open(dir_name+"/"+i.split('/')[5], "wb") as code:
				code.write(r.content)
		except Exception as e:
			continue
		#count =count+1
		print i.split('/')[5]
		#print("下载进度:%.2f" % (count/len(filename)))
headers = {
	'Connection': 'close',
}
#url=url.replace("://","%3A%2F%2F").replace("/","%2F")
#print url
api="http://www.wq114.org/yun.php?url="+url
print api
req=requests.get(url=api,headers=headers)
soup=BeautifulSoup(req.content,'lxml')
#print soup("iframe")[0].attrs['src']
url_2=soup("iframe")[0].attrs['src']
api_2="http://www.wq114.org"+url_2
print api_2
rep_2=requests.get(url=api_2,headers=headers)
soup_2=BeautifulSoup(rep_2.content,'lxml')
#print soup_2
s=str((list(soup_2("script"))[5]))
#pattern=re.compile("url	:'(.+)'",re.IGNORECASE)
#url=pattern.findall(s)
#print url
#print s.split(':')[7].split(',')[0].strip(" ").strip("'")
url=s.split(':')[7].split(',')[0].strip(" ").strip("'")
print url
get_data={
	'up':0,
	'url':url
}
res=requests.post(url="http://www.wq114.org/x2/api.php",headers=headers,data=get_data)
print res.content
response=json.loads(res.content)['url'].replace("%3A%2F%2F","://").replace("%2F","/")
print response
res=requests.post(url=response,headers=headers)
res2=res.content.split('\n')[2]
print res2
next_api="https://cdn.letv-cdn.com/"+res2
res=requests.get(url=next_api,headers=headers)
s=res.content.split('\n')
print type(s)
s_list=[]
for i in s[0:]:
	#print i
	if str(i).endswith(".ts"):
		print i
		s_list.append(i)
file_size = len(s_list)
print file_size
num_thread=50
part = file_size // num_thread
print part
Thread_list=[]
for i in range(num_thread):
	start = part * i
	if i == num_thread - 1:
		end = file_size
	else:
		end = start + part
	t = threading.Thread(target=Handler, kwargs={'start': start, 'end': end,  'filename': s_list})
	t.setDaemon(True)
	t.start()
	Thread_list.append(t)
'''main_thread = threading.current_thread()
for t in threading.enumerate():
	if t is main_thread:
		continue'''
for t in Thread_list[0:]:
	t.join()
#第二部分 合并ts文件
files2=[]
for root, dirs, files in os.walk(dir_name):
	#print files
	#print type(list(files))
	files=list(files)
try:
	for i in files[:]:
		#print len(files)
		#print files[i]
		if len(i)>len(files[0]):
			#print len(files)
			files2.append(i)
			files.remove(i)
except Exception as e:
	pass
files.sort()
files2.sort()
#print files
#print files2
files=files+files2
#print sorted(files)
#print files
os.chdir(dir_name)
num=len(files)//5
num2=len(files)%5
for i in range(6):
	if i!=5:
		shell_str = '+'.join(files[i*num:(i+1)*num-1])
		shell_str = 'copy /b /y '+ shell_str +" "+str(i)+'.ts'
		print i*num
		print (i+1)*num-1
		#print shell_str
		os.system(shell_str)
	else:
		shell_str = '+'.join(files[5*num:5*num+num2])
		shell_str = 'copy /b /y '+ shell_str +" "+str(5)+'.ts'
		#print shell_str
		print 5*num
		print 5*num+num2
		os.system(shell_str)
shell_str = 'copy /b /y 0.ts+1.ts+2.ts+3.ts+4.ts+5.ts last.mp4'
print shell_str
os.system(shell_str)
#删除文件
os.system("del /Q *.ts")
'''
next_api="http://www.wq114.org/x2/tong.php?url=https://cdn.letv-cdn.com/20181101/SXCSesZ8/index.m3u8"
s=requests.session()
req=s.post(url=next_api)
print req.content
'''
end_time=time.time()
last_time=end_time-start_time
print "最终耗时:%s" %last_time
print "下载完成,请观看。"

 
调试笔记:

#coding=utf-8
import os
#a="FJ0La58068"+str(i)+".ts"
tmp = []
'''
for i in filename[start,end]:
	tmp.append(i.split("/")[5])
	'''
for root, dirs, files in os.walk("./ts"):
	#print files
	#print type(list(files))
	files=list(files)
	#print files
#print files.sort
a="FJ0La580681234.ts"
b="FJ0La580682235.ts"
c="FJ0La58068222.ts"
d="FJ0La58068123.ts"
if a>c:
	print "yes"
else:
	print "no"
b=[]
a=["FJ0La58068234.ts","FJ0La580682235.ts","FJ0La58068122.ts","FJ0La58068123.ts"]
print len(a)
print len(a[0])
for i in range(len(a)-1):
	if len(a[i])>len(a[0]):
		b.append(a[i])
		a.remove(a[i])
a.sort()
b.sort()
print a+b
files2=[]
print len(files)
print type(files)
print files[0]
try:
	for i in files[:]:
		#print len(files)
		#print files[i]
		if len(i)>len(files[0]):
			#print len(files)
			files2.append(i)
			files.remove(i)
except Exception as e:
	pass
files.sort()
files2.sort()
#print files
#print files2
files=files+files2
#print sorted(files)
#print files
os.chdir("ts/")
num=len(files)//5
num2=len(files)%5
for i in range(6):
	if i!=5:
		shell_str = '+'.join(files[i*num:(i+1)*num-1])
		shell_str = 'copy /b /y '+ shell_str +" "+str(i)+'.ts'
		print i*num
		print (i+1)*num-1
		#print shell_str
		os.system(shell_str)
	else:
		shell_str = '+'.join(files[5*num:5*num+num2])
		shell_str = 'copy /b /y '+ shell_str +" "+str(5)+'.ts'
		#print shell_str
		print 5*num
		print 5*num+num2
		os.system(shell_str)
shell_str = 'copy /b /y 0.ts+1.ts+2.ts+3.ts+4.ts+5.ts abc.mp4'
print shell_str
os.system(shell_str)
'''print tmp
print "no"
for i in range(0000,1000):
	tmp.append("FJ0La58068"+str(i).zfill(3)+".ts")
for i in range(1000,1039):
	tmp.append("FJ0La58068"+str(i).zfill(4)+".ts")
#print tmp
'''
'''
shell_str = '+'.join(tmp[0:450])
shell_str = 'copy /b /y '+ shell_str + ' a.ts'
os.system(shell_str)
print(shell_str)
'''
#os.system("del /Q *.ts")
print "下载完成,请观看。"

 
 
2018.11.6

发表评论

电子邮件地址不会被公开。 必填项已用*标注