Mooc视频下载器

原文地址:金君

下载链接:64位机 32位机

使用方式:

下载你所需要的对应的exe直接运行。输入你所需要的视频名称或者视频的网址链接。

然后选择在搜索到的课程名称前面对应的id,然后会提示选择视频清晰度。

最后将会在exe同一级目录下生成文件夹。

附代码:

'''
慕课视频下载器
'''
import sys
import os
import re
import json
from time import clock, sleep
from socket import timeout
from urllib import request
from urllib import parse
from urllib.error import ContentTooShortError, URLError, HTTPError
author = "Dreams孤独患者"
email = "1079339655@qq.com"
CODE = 'Mooc.jpg' # 支付宝二维码领红包的图片
PATH = os.path.dirname(os.path.abspath(file)) # 程序当前路径
TIMEOUT = 20 # 请求超时时间为 20 秒
winre = re.compile(r'[?|<>:"/\\r\n\t]') # windoes 文件非法字符匹配 start_time = clock() start_size = 0 speed = 0 class Mooc: def init(self): self.mooc_url = 'http://tools.antlm.com/index.php' self.search_url = 'https://www.icourse163.org/dwr/call/plaincall/MocSearchBean.searchMocCourse.dwr' self.course_data = {'a':'ajax_query','course_url': None} self.search_data = { 'callCount':'1', 'scriptSessionId':'${scriptSessionId}190', 'c0-scriptName':'MocSearchBean', 'c0-methodName':'searchMocCourse', 'c0-id':'0', 'c0-e1':None, 'c0-e2':'number:1', 'c0-e3':'boolean:true', 'c0-e4':'null:null', 'c0-e5':'number:0', 'c0-e6':'number:30', 'c0-e7':'number:20', 'c0-param0':'''Object_Object:{ keyword:reference:c0-e1, pageIndex:reference:c0-e2, highlight:reference:c0-e3, categoryId:reference:c0-e4, orderBy:reference:c0-e5, stats:reference:c0-e6, pageSize:reference:c0-e7 }''', 'batchId':'1543633161622', } self.title = None # 下载课程的标题 self.content = None # 课程 Json 数据,包含视频链接和课件链接 self.num = 0 def searchMooc(self, courseName): # 通过课程名来查找慕课 self.search_data['c0-e1'] = courseName data = parse.urlencode(self.search_data).encode('utf8') req = request.Request(url=self.search_url, data=data, method='POST') response = request.urlopen(req, timeout=TIMEOUT) text = response.read().decode('utf8') response.close() # 记得一定要关闭,否则远程主机会拒绝同时多次连接 names = re.findall(r'highlightName="(.?)"', text) # 一些麻烦的正则匹配和筛选
names = map(lambda s:re.sub(r'[{}# ]', '', s), names)
names = map(lambda s:s.encode('utf8').decode('unicode_escape'), names)
schools = re.findall(r'highlightUniversity="(.?)"', text) schools = map(lambda s:re.sub(r'[{}# ]', '', s), schools) schools = map(lambda s:s.encode('utf8').decode('unicode_escape'), schools) urls = re.findall(r'courseId=(\d);', text)
urls = map(lambda s: "https://www.icourse163.org/course/WHUT-"+s, urls)
courses = [{'name':na,'school':sch,'url':url} for na,sch,url in zip(names,schools,urls)]
return courses
def getTitle(self, course_url): # 获取指定慕课URL的课程标题
response = request.urlopen(course_url, timeout=TIMEOUT)
text = response.read().decode('utf8')
response.close()
title = text[text.find('')+7:text.find('')].strip()
self.title = winre.sub('', title) # 用于除去win文件非法字符
return title
def getContent(self, course_url): # 获取指定慕课URL的课程视频和课件链接,最后保存为字典
self.course_data['course_url'] = course_url
params = '?' + ''.join(k+'='+self.course_data[k]+'&' for k in self.course_data)
response = request.urlopen(self.mooc_url+params, timeout=TIMEOUT)
self.content = json.loads(response.read().decode('utf8'))['data']
response.close()
def getSize(self, course_url): # 获取待下载视频的大小
cnt = 0
while cnt < 5: try: response = request.urlopen(course_url, timeout=TIMEOUT) header = dict(response.getheaders()) size = float(header['Content-Length']) / (1024*1024) return size except URLError: cnt += 1 sleep(1) finally: response.close() raise Exception("网络异常") def download_lesson(self, source ,dirname): # 下载每个课程 unit_titles = ['None', 'None'] # 每一单元的标题,包含视频和课件 unit_urls = [None, None] # 每一单元的资源链接,包含视频和课件 for url_name in ('shdUrl', 'hdUrl'): # 优先获取高清视频资源 if (unit_urls[0] is None): unit_urls[0] = source.get(url_name,None) unit_titles[0] = winre.sub('',source.get('unit_title', 'None')) if (unit_urls[1] is None): unit_urls[1] = source.get('project_url', None) unit_titles[1] = winre.sub('',source.get('unit_title', 'None')) if unit_urls[0]: self.num += 1 video_name = "("+str(self.num)+")--"+unit_titles[0] mooc_video = os.path.join(dirname, video_name)+'.mp4' # 视频路径 if not os.path.exists(mooc_video): size = self.getSize(unit_urls[0]) print(" |-{} 大小: {:.2f}M".format(align(video_name,50), size)) try: downlaod_file(unit_urls[0], mooc_video, schedule) #下载文件,这里下载的是高清资源\ except KeyboardInterrupt: # 如果用户中断,则删除下载不完整的文件 if os.path.exists(mooc_video): os.remove(mooc_video) raise KeyboardInterrupt() else: print(" |-{} 已经成功下载!".format(align(video_name,50))) if unit_urls[1]: pdf_name = unit_titles[1] mooc_pdf = os.path.join(dirname, pdf_name)+'.pdf' # 课件路径 if not os.path.exists(mooc_pdf): try: downlaod_file(unit_urls[1], mooc_pdf) except KeyboardInterrupt: if os.path.exists(mooc_pdf): os.remove(mooc_pdf) raise KeyboardInterrupt() def download(self): # 根据课程视频链接来下载高清MP4慕课视频, 成功下载完毕返回 True print('\n{:^60s}'.format(self.title)) rootDir = os.path.join(PATH, self.title) if not os.path.exists(rootDir): os.mkdir(rootDir) try: for chapter in self.content: # 去除 win 文价夹中的非法字符 chapter_title = winre.sub('', chapter['chapter_title'].strip()) chapterDir = os.path.join(rootDir, chapter_title) if not os.path.exists(chapterDir): os.mkdir(chapterDir) print(chapter_title) for unit in chapter: if unit=='chapter_title': continue lesson_title = winre.sub('', chapter[unit]['lesson_title']) lessonDir = os.path.join(chapterDir, lesson_title) if not os.path.exists(lessonDir): os.mkdir(lessonDir) print(" "+lesson_title) self.num = 0 for lesson in chapter[unit]: if lesson == 'lesson_title': continue source = chapter[unit][lesson] self.download_lesson(source, lessonDir) return True except (Exception, KeyboardInterrupt) as err: if isinstance(err, KeyboardInterrupt): # 如果是用户自己中断 print() else: # 否则即使网络问题 print("\n请检查网络状态是否良好…") return False def schedule(a, b, c): #下载进度指示 a:已经下载的数据块 b:数据块的大小 c:远程文件的大小 global start_time, start_size, speed length = 66 sch = min(100 * a * b / c, 100) per = min( length * a * b // c, length) if a%5 == 0 or per == length: if per <= length: print('\r |-['+per''+(length-per)'.'+'] {:.2f}% {:.2f}M/s'.format(sch,speed),end=' (ctrl+c中断)') if clock()-start_time > 0.5: # 时间差大于0.5秒的时候刷新平均速度 speed = (ab-start_size) / ((clock()-start_time)10241024) start_size = ab start_time = clock() if per == length: print() def align(string, width): # 对齐汉字字符窜,同时截断多余输出 res = "" size = 0 for ch in string: if (size+3 > width): break size += 1 if ord(ch) <= 127 else 2 res += ch res += (width-size)' ' return res def downlaod_file(url, filename, backfunc = None): # 用于处理网络状态不好时,重新下载 global start_time, start_size, speed # 若三次后还是无法下载,则报错 cnt = 0 while cnt < 5: try: start_time = clock() # 初始化时间,大小和速度 speed = start_size = 0 request.urlretrieve(url, filename, backfunc) return except (ContentTooShortError, URLError, ConnectionResetError, timeout): if os.path.exists(filename): os.remove(filename) # 删除未下载完毕的文件 cnt += 1 sleep(1) finally: request.urlcleanup() sleep(0.1) raise Exception("网络不可用") def get_SourceFile(filename): # 获取打包后资源文件的位置,这里为二维码图片的路径 if getattr(sys, 'frozen', False): #是否打包 file_path = sys.MEIPASS else: file_path = PATH return os.path.join(file_path, filename) def UI_interface(mooc): try: while True: os.system("cls") print("\t"+"="80) print('\t|\t\t\t中国大学视频下载器 \t\t作者: {:^16s} |'.format(author)) print("\t|\t\t\twww.icourse163.org\t\t邮箱: {:^16s} |".format(email)) print("\t"+"="80) keystr = input('\n输入一个视频课程网址或者一个课程名(q退出): ') if keystr == 'q': break match = re.search(r'(www.icourse163.org/.?)(#/.)?$', keystr) if match: course_url = "https://"+match.group(1) else: print("正在搜索课程……") try: courses = mooc.searchMooc(keystr) except (URLError, ConnectionResetError, timeout): input("请检查网络后继续…") continue if (courses == []): print('很抱歉,未搜索到课程 "{}" !'.format(keystr)) input("请继续…") continue else: print("编号\t课程名称\t\t\t开课单位\t\t网址链接") cnt = 1 for course in courses: print(align(str(cnt),5), align(course['name'],32), align(course['school'],18), course['url']) cnt += 1 while True: try: order = int(input("输入一个要下载的课程编号(0退出): ")) if order >= 0 and order <= len(courses): break except ValueError: pass print("课程编号必须是一个0-{}的数字".format(len(courses))) if order == 0: continue course_url = courses[order-1]['url'] print("正在连接资源……") try: title = mooc.getTitle(course_url) mooc.getContent(course_url) except (ConnectionResetError, timeout, HTTPError): input("该网址不存在!\n请继续…") continue except URLError: input("请检查网络后继续…") continue except KeyError: print('很抱歉,无法获取 "{}" 对应的课程资源!'.format(course_url)) input("请继续…") continue isdownload = mooc.download() while not isdownload: redown = None while redown not in ('y','n'): try: redown = input("是否继续[y/n]: ") except KeyboardInterrupt: print() if redown == 'n': break isdownload = mooc.download() if isdownload: print('"{}"成功下载完毕!'.format(title)) input("请按任意键返回主界面…") except KeyboardInterrupt: print() finally: if (input("\n小哥哥,小姐姐,扫码领红包 …(⊙⊙)… [y/n]: ") != 'n'):
alipy = get_SourceFile(CODE)
os.startfile(alipy)
def main():
mooc = Mooc()
try:
UI_interface(mooc)
except (KeyboardInterrupt, EOFError):
pass
os.system("pause")
if name == 'main':
main()
1 评论
留言