diff --git a/.gitattributes b/.gitattributes new file mode 100755 index 0000000..412eeda --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Custom for Visual Studio +*.cs diff=csharp +*.sln merge=union +*.csproj merge=union +*.vbproj merge=union +*.fsproj merge=union +*.dbproj merge=union + +# Standard to msysgit +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..b9d6bd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,215 @@ +################# +## Eclipse +################# + +*.pydevproject +.project +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.classpath +.settings/ +.loadpath + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific +.cproject + +# PDT-specific +.buildpath + + +################# +## Visual Studio +################# + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.sln.docstates + +# Build results + +[Dd]ebug/ +[Rr]elease/ +x64/ +build/ +[Bb]in/ +[Oo]bj/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +*_i.c +*_p.c +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.log +*.scc + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +*.ncrunch* +.*crunch*.local.xml + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.Publish.xml +*.pubxml + +# NuGet Packages Directory +## TODO: If you have NuGet Package Restore enabled, uncomment the next line +#packages/ + +# Windows Azure Build Output +csx +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +sql/ +*.Cache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.[Pp]ublish.xml +*.pfx +*.publishsettings + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file to a newer +# Visual Studio version. Backup files are not needed, because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +App_Data/*.mdf +App_Data/*.ldf + +############# +## Windows detritus +############# + +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Mac crap +.DS_Store + + +############# +## Python +############# + +*.py[co] + +# Packages +*.egg +*.egg-info +dist/ +build/ +eggs/ +parts/ +var/ +sdist/ +develop-eggs/ +.installed.cfg + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox + +#Translations +*.mo + +#Mr Developer +.mr.developer.cfg diff --git "a/\346\272\220\344\273\243\347\240\201/.ZhihuEpub.py.swo" "b/\346\272\220\344\273\243\347\240\201/.ZhihuEpub.py.swo" new file mode 100644 index 0000000..3859444 Binary files /dev/null and "b/\346\272\220\344\273\243\347\240\201/.ZhihuEpub.py.swo" differ diff --git "a/\346\272\220\344\273\243\347\240\201/MarkDownCssStyle.py" "b/\346\272\220\344\273\243\347\240\201/MarkDownCssStyle.py" new file mode 100755 index 0000000..30188d0 --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/MarkDownCssStyle.py" @@ -0,0 +1,342 @@ +# -*- coding: utf-8 -*- + +def returnMarkDownCssStyle(): + return u""" + + + """ diff --git "a/\346\272\220\344\273\243\347\240\201/SoloQuestion.py" "b/\346\272\220\344\273\243\347\240\201/SoloQuestion.py" new file mode 100755 index 0000000..17f5642 --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/SoloQuestion.py" @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +import urllib2 +import HTMLParser +import re +import zlib +import threading +import time +import datetime +import HTMLParser#HTML解码< +import json#在returnPostHeader中解析Post返回值 +import os#打开更新页面 + +import urllib#编码请求字串,用于处理验证码 +from ZhihuEpub import * +PostHeader = OldPostHeader() +f = open('ReadList.txt','r') + +for t in f: + + + + + + + + + + + + + + + + +def returnReDict():#返回编译好的正则字典#Pass + Dict = {} + Dict['_AgreeCount'] = re.compile(r'(?<=data-votecount=")\d*(?=">)') + Dict['_AnswerContent'] = #直接使用
进行提取,需要先移除所有img与nosprict标签,然后在解引用,原图片地址在新img标签里的data-original内 + Dict['_AnswerID'] = re.compile(r'(?<=)') + Dict['_Questionhref'] = re.compile(r'(?<=)') + Dict['_UpdateTime'] = re.compile(r'(?<=).*(?=)')#分为13:25、昨天 00:26、2013-05-07三种情况,需进一步处理 + Dict['_CommitCount'] = re.compile(r'(?<=).*?(?= )')#若转化为int失败则是添加评论#即为0条 + Dict['_ID'] = re.compile(r'(?<=)') + Dict['_UserIDLogoAdress'] = re.compile(r'(?<=src=")http://p\d\.zhimg\.com[/\w]{7}[_\w]{11}\.jpg(?=" class="zm-list-avatar)') + Dict['_UnSuccessName'] = re.compile(r'(?<=

).*(?=

)')#?存疑 + return Dict + +def ReadAnswer(ReDict,html_parser,text="",QuestionTitle=''):#UnitTest#newCommitTag + Dict={} + Dict["ID"] = "" ## + Dict["Sign"] = ""# + Dict["AgreeCount"] = 0# + Dict["CommitCount"] = 0# + Dict["QuestionID"] = ""# + Dict["AnswerID"] = ""# + Dict["UpdateTime"] = "1970-01-01"# + Dict["QuestionTitle"] = "" ## + Dict["Questionhref"] = "" # + Dict["AnswerContent"] = "" ## + Dict["UserName"] = "ErrorName" ## + Dict['UserIDLogoAdress']= '' # + if text=='': + return Dict + def Help_ReadAnswer(t="",flag=True): + u""" + #辅助系函数 + * 用于提取答案内容,提高代码复用度 + * 因为某些项匹配失败后应直接调用默认值,所以添加了Flag以做区分 + """ + try: + Dict[t] = ReDict['_'+t].search(text).group(0) + except AttributeError: + if flag: + print u"没有收集到" + t + print u"知乎页面结构已变动,程序无法正常运行,快上知乎@姚泽源喊他更新脚本" + return False + else : + pass + return True + + #特殊处理 + + try: + Dict["AnswerContent"] = html_parser.unescape(removeTag(returnTagContent(text=t,tagname='div',TrueTagName='
'),['img','noscript'])) + Dict["AnswerContent"] = removeAttibute(Dict["AnswerContent"],['src']).replace('data-original','src') + except AttributeError: + print u"答案内容没有收集到" + print u"知乎页面结构已变动,程序无法正常运行,快上知乎@姚泽源喊他更新脚本" + return Dict + + Dict["QuestionTitle"] = QuestionTitle + try : + ID = ReDict['_ID'].search(text).group(0) + _UserName = re.compile(r'(?<=).*?(?=)')# 这里必须要用到已经捕获的ID,否则没法获得用户名 + Dict["UserName"] = _UserName.search(text).group(0) + except AttributeError : + try :#对应于知乎用户与匿名用户两种情况 + Dict["UserName"] = ReDict['_UnSuccessName'].search(text).group(0) + ID = '404NotFound!' + except AttributeError: + Dict["UserName"] = u"知乎用户" + ID = 'ZhihuUser!' + + Dict["ID"] = ID + + #常规匹配 + #时间放到最后,因为要靠时间验证是否匹配成功 + for t in ["AgreeCount","QuestionID","AnswerID","UpdateTime","Questionhref"]: + if Help_ReadAnswer(t): + pass + else: + return Dict + + for t in ['UserIDLogoAdress','Sign','CommitCount']: + if Help_ReadAnswer(t,False): + pass + else: + return Dict + + Dict["Questionhref"] = 'http://www.zhihu.com'+Dict["Questionhref"] + + if len(Dict["UpdateTime"])!=10 : + if len(Dict["UpdateTime"])!=5 : + Dict["UpdateTime"] = time.strftime(u'%Y-%m-%d',time.localtime(time.time()-86400))#昨天 + else : + Dict["UpdateTime"] = time.strftime(u'%Y-%m-%d',time.localtime(time.time()))#今天 + + return Dict +def WorkForFetchUrl(url='',ReDict={},html_parser=None,AnswerDictList=[],ErrorList=[],IndexList=[]):#抓取回答链接#注意,Page是字符串#Pass) + try : + content = OpenUrl(urllib2.Request(headers=PostHeader,url=t.replace('\r','').replace('\n','')),Timeout=30).decode(encoding="utf-8",errors='ignore').replace('\r','').replace('\n','') + title = re.search(r'*?',content.replace('\r','').replace('\n','').replace(' ','')).group(0) + File = open(title+'.htm','w') + File.write(content) + File.close() + except IOError as e: + print e + ErrorList.append(url) + return + except ValueError as e : + print e + ErrorReportText(Info=u'404网页已删除\t:\t'+str(e)) + return + QuestionTitle = re.search('(?<=).*?(?=)',content).group(0) + for t in content.split('
0): + Front_ = (Front[:-1]+'∟'+Front[-1:]) + else: + Front_='' + print '||'+u'\t'+Front_+str(r).decode(encoding="utf-8",errors='ignore') ,':\t',str(Dict[r]).decode(encoding="utf-8",errors='ignore') +def PrintInOneLine(text=''):#Pass + try: + sys.stdout.write("\r"+" "*60+'\r') + sys.stdout.flush() + sys.stdout.write(text) + sys.stdout.flush() + except: + ErrorReportText(text) +def OpenUrl(Request,Timeout=5):#打开网页,只尝试一次,失败时返回空字符串,错误信息中包含未打开网址。话说字符串分割对空列表还有效否?#OKTag + try : + Content = urllib2.urlopen(Request,timeout=Timeout) + except urllib2.HTTPError as inst: + print inst + if int(inst.code/100) == 4: + print u'您所要找的网页在一片没有知识的荒原上' + raise ValueError(u"404 Not Found"+u"错误页面\t:\t"+Request.get_full_url())#此失败不可修复,通过报错直接跳过读取该页面 + else: + if int(inst.code/100)== 5: + print u"知乎正在紧张的撰写答案,服务器繁忙ing,稍后重试" + else : + print inst.code#未知错误 + print u'打开网页时出现未知错误' + except urllib2.URLError as inst : + print inst + print inst.reason#原因不详 + print u'错误网址:'+Request.get_full_url() + print u'打开网页异常#稍后重试' + except socket.timeout as e : + print e + print u"打开网页超时" + else: + if Content.info().get(u"Content-Encoding")=="gzip": + try: + k = zlib.decompress(Content.read(), 16+zlib.MAX_WBITS) + except zlib.error as ziperror: + print u'解压缩出错' + print u'错误信息:' + print zliberror + raise IOError(u"解压网页内容时出现错误"+u"错误页面\t:\t"+Request.get_full_url())#此失败不可修复,通过报错直接跳过读取该页面 + else : + k = Content.read() + #去除了编码为utf-8部分 + return k + return ''#失败则返回空字符串 + +def ErrorReturn(ErrorInfo=""):#返回错误信息并退出,错误信息要用unicode编码 + print ErrorInfo + print u"点按回车退出" + input() + os._exit(0) + +def setMaxThread(): + try: + MaxThread=int(raw_input()) + except ValueError as e : + print e + print u'貌似输入的不是数...最大线程数重置为20,点击回车继续运行' + MaxThread=20 + raw_input() + if MaxThread>200 or MaxThread<1: + if MaxThread>200: + print u"线程不要太大好伐\n你线程开的这么凶残你考虑过知乎服务器的感受嘛" + else: + print u"不要输负数啊我去" + print u"最大线程数重置为20" + MaxThread=20 + print u'猛击回车继续~' + raw_input() + return MaxThread + +def ThreadLiveDetect(ThreadList=[]): + LiveFlag = True + while LiveFlag:#等待线程执行完毕 + LiveFlag = False + Running = 0 + for t in ThreadList: + if t.isAlive(): + LiveFlag=True + Running+=1 + PrintInOneLine( u"目前还有{}条线程正在运行,等待所有线程执行完毕".format(Running)) + time.sleep(1) +def ErrorReportText(Info='',flag=True): + if flag : + f =open(u'未能成功打开的页面.txt','a') + else : + f =open(u'未能成功打开的页面.txt','w') + f.write(Info) + f.close() +def ChooseTarget(url=''):#选择#Pass + try : + ID = re.search(r'(?<=zhihu\.com/people/)[^/]*',url).group(0)#匹配ID + except AttributeError: + pass + else: + print u'成功匹配到知乎ID,ID=',ID + return 1,ID + try : + Collect = re.search(r'(?<=zhihu\.com/collection/)\d*',url).group(0)#匹配收藏 + except AttributeError: + pass + else: + print u'成功匹配到收藏夹,收藏夹代码=',Collect + return 2,Collect + try : + Roundtable= re.search(r'(?<=zhihu\.com/roundtable/)[^/]*',url).group(0)#知乎圆桌 + except AttributeError: + pass + else: + print u'成功匹配到知乎圆桌,圆桌名=',Roundtable + return 3,Roundtable + try : + Topic = re.search(r'(?<=zhihu\.com/topic/)\d*',url).group(0)#知乎话题 + except AttributeError: + pass + else: + print u'成功匹配到话题,话题代码=',Topic + return 4,Topic + return 0,"" + +def CopyFile(root='',TargetFile='',flag=True):#Pass + try : + if flag : + f = open(root,'r') + k = open(TargetFile,'w') + else: + k = open(TargetFile,'wb') + f = open(root,'rb') + k.write(f.read()) + except IOError as e: + print e + ErrorReportText(str(e)) + return +####ToolEnd#### + +def CheckImgFileExist(CheckList=[],ErrorList=[]):#PassTag + for url in CheckList: + MetaName = u'../知乎图片池/' + PixName(url) + if not os.path.isfile(MetaName): + ErrorList.append(url) + +def DownloadPicWithThread(ImgList=[],MaxThread=20):#添加图片池功能#当图片下载完成时在ImgList中删除之#newCommitTag + Time=0 + MaxPage = len(ImgList) + ErrorList = [] + while Time<10 and MaxPage>0: + Buf_ImgList = [] + Time+=1 + ThreadList = [] + for t in ImgList:#因为已下载过的文件不会重新下载,所以直接重复执行十遍,不必检测错误#待下载的文件可能会突破万这一量计,所以还是需要一些优化 + ThreadList.append(threading.Thread(target=DownloadImg,args=(t,Buf_ImgList,))) + for Page in range(MaxPage): + if threading.activeCount()-1 < MaxThread:#实际上是总线程数 + ThreadList[Page].start() + else : + PrintInOneLine(u'第({}/10)轮下载图片,线程库中还有{}条线程等待运行'.format(Time,MaxPage-Page)) + time.sleep(1) + ThreadLiveDetect(ThreadList) + + ImgList = list(set(ImgList)-set(Buf_ImgList))#剔除不能下载的图片链接地址 + ErrorList += Buf_ImgList#将下载失败的图片列表储存起来,一并输出 + Buf_ImgList = [] + CheckImgFileExist(CheckList=ImgList,ErrorList=Buf_ImgList) + ImgList = Buf_ImgList + + MaxPage = len(ImgList) + if MaxPage !=0: + print u'第{}轮下载执行完毕,剩余{}张图片待下载,若下载失败的图片过多可以调用迅雷进行下载,待下载图片列表为『程序所在文件夹\电子书制作临时资源库\待下载图片列表.txt』,将迅雷下载下来的图片放置于『程序所在文件夹\电子书制作临时资源库\知乎图片池』中即可'.format(Time,MaxPage) + time.sleep(1)#休息一秒后继续 + else : + print u'\n所有图片下载完毕' + ErrorList = list(set(ErrorList)) + if len(ErrorList)>0: + print u'开始输出下载失败的图片列表' + f = open(u'../下载失败的图片列表.txt','a')#输出下载失败列表 + f.write(u'\n-------------------------------------------\n') + f.write(u'时间戳:\t'+time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+'\n') + f.write(u'-------------------------------------------\n') + print u'以下文件下载失败' + for t in ErrorList: + f.write(t+'\n') + print t + f.close() +def returnCursor():#PassTag + if os.path.isfile('./ZhihuDateBase.db'): + conn = sqlite3.connect("./ZhihuDateBase.db") + conn.text_factory = str + cursor = conn.cursor() + return cursor + else: + ErrorReturn(u'抱歉,没有找到数据库,请先运行知乎助手') + return None +def Mkdir(DirName=u''):#PassTag + if DirName=='': + return + else: + try : + os.mkdir(DirName) + except OSError: + pass#已存在 + return +def CreateMimeType():#PassTag + f = open('mimetype','w') + f.write('application/epub+zip') + f.close() +def CreateContainer_XML():#PassTag + f = open('META-INF/container.xml','w') + f.write(''' + + + + +''') + f.close() +def returnTagContent(text='',tagname='',TrueTagName=''):#NonUseTag#返回时会带上标签 + TagBeginStr = TrueTagName + BeginPos = text.index(TagBeginStr)+len(TagBeginStr) + rowEndPos = text.index('') + newText = text[BeginPos:rowEndPos]#初始字符位置 + #开始检测是否有重复标签 + completeTime = len(re.findall(r"<"+tagname+r'.*?>',newText)) + while completeTime: + bufPos = rowEndPos + for i in range(completeTime): + bufPos = text.index('',bufPos+1) + newText = text[rowEndPos:bufPos] + completeTime = len(re.findall(r"<"+tagname+r'.*?>',newText)) + rowEndPos = bufPos + return text[BeginPos-len(TagBeginStr):rowEndPos+len(tagname)+3] +def removeTagContentWithTag(text='',TagList=[]):#移除List中所有的Tag + for name,truename in TagList: + try : + text = text.replace(returnTagContent(text=text,tagname=name,TrueTagName=tagname),'') + except : + pass + return text + +def removeTag(text='',tagname=[]):#NonUseTag + text = text.replace('','') + text = re.sub(r"<"+tagname+r'.*?>','',text) + return text +def removeAttibute(text='',AttList=[]):#PassTag + for Att in AttList: + for t in re.findall(r'\s'+Att+'[^\s^>]*',text): + text = text.replace(t,'') + return text +def closeimg(text='',ImgList=[]):#PassTag#若有大图直接下载之#为图片添加点击框 + for t in re.findall(r'',text): + try : + t.index('data-original') + except : + text = text.replace(t,fixPic(removeAttibute(t,['data-rawwidth','data-original']).replace("data-rawheight",'height')[:-1]+u' alt="知乎图片"/>',ImgList)) + else: + text = text.replace(t,fixPic(removeAttibute(t,['data-rawwidth','data-original']).replace("data-rawheight",'height')[:-1]+u' alt="知乎图片"/>',ImgList))#使用小图,用原图可以将data-original替换为src + return text +def PixName(t):#PassTag + return re.search(r'[^/"]*?\.jpg',t).group(0) +def fixPic(t='',ImgList=[]):#PassTag#添加多看扩展 + for k in re.findall(r'(?<=src=")http://[/\w\.^"]*?zhimg.com[/\w^"]*?.jpg',t): + t = t.replace(k,'../images/'+PixName(k)) + ImgList.append(k) + return '
'+t+'
' +def DownloadImg(imghref='',ErrorList=[]):#下载失败时应报错或重试#文件已成功下载时也添加到ErrorList中#newCommitTag + try : + CheckName = u'../知乎图片池/' + try : + MetaName = PixName(imghref) + except AttributeError:#需要编写测试用例 + raise ValueError(u'程序出现错误,未能成功提取出图片下载地址'+u'目标网址'+imghref) + imgfilename = './OEBPS/images/'+MetaName + if not os.path.isfile(CheckName+MetaName): + k = OpenUrl(urllib2.Request(imghref),Timeout=20)#这里会返回IOError + if len(k)==0: + print u'Download image '+MetaName+' error ,will try again soon' + return 0 + imgfile = open(CheckName+MetaName,"wb") + imgfile.write(k) + imgfile.close() + if not os.path.isfile(imgfilename): + imgfile = open(imgfilename,"wb") + imgpoolfile = open(CheckName+MetaName,"rb") + imgfile.write(imgpoolfile.read()) + imgfile.close() + imgpoolfile.close() + except ValueError as e : + print e + ErrorList.append(imghref) + PrintInOneLine( u'图片{}下载失败\r'.format(MetaName)) + ErrorReportText(u'图片下载错误\t:\t'+str(e)) + except IOError as e : + print e + ErrorList.append(imghref) + PrintInOneLine( u'图片{}下载失败\r'.format(MetaName)) + ErrorReportText(u'图片下载错误\t:\t'+str(e)) + else : + PrintInOneLine( u'图片{}下载成功\r'.format(MetaName)) + return 0 +def CreateOPF(OPFInfoDict={},Mainfest='',Spine=''):#生成文件函数均假定当前目录为电子书根目录#PassTag + f = open('./OEBPS/content.opf','w') + XML = u''' + + + %(BookTitle)s + %(AuthorAddress)s + zh-CN + %(AuthorName)s + %(Description)s + 本电子书由ZhihuHelper制作生成,仅供个人阅读学习,严禁用于商业用途 + 知乎 + + + + + + + + '''%OPFInfoDict + Mainfest+ ''' + + + + + + + + '''+Spine+''' + + + + + + + ''' + f.write(XML) + f.close() +def CreateNCX(NCXInfoDict={},Ncx=''):#PassTag + f = open('./OEBPS/toc.ncx','w') + XML = ''' + + + + + + + + + + %(BookTitle)s + '''%NCXInfoDict+Ncx+''' + + ''' + f.write(XML) + f.close() + +def ZipToEpub(EpubName='a.epub'):#newCommitTag + epub = zipfile.ZipFile(os.path.abspath('../../'+os.curdir+u'/知乎答案集锦/'+EpubName),'w') + epub.write('mimetype', compress_type=zipfile.ZIP_STORED) + def Help_ZipToEpub(Dir='.'): + for p in os.listdir(Dir): + if p == EpubName or p == 'mimetype': + PrintInOneLine(u'该文件已添加,自动跳过') + continue + filepath = os.path.join(Dir,p) + if not os.path.isfile(filepath): + if p == '.' or p == '..': + continue + Help_ZipToEpub(Dir=filepath) + else: + PrintInOneLine( u'将{}添加至电子书内'.format(filepath).decode(encoding="utf-8",errors='ignore')) + epub.write(filepath, compress_type=zipfile.ZIP_STORED) + Help_ZipToEpub() + epub.close() +##########################################################新开始 +def DealAnswerDict(cursor=None,AnswerDict={},ImgList=[]):#必须是符合规定的Dict,规定附后 + for t in AnswerDict['AnswerList']: + Dict = {} + SelectAnswerList = cursor.execute("select * from AnswerInfoTable where Questionhref=?",(t,)).fetchone()#SQLTag + cursor.execute('select AnswerContent from AnswerContentTable where Questionhref=?',(t,)) + AnswerContent = cursor.fetchone()[0] + if SelectAnswerList==None: + AnswerDict[Dict['AnswerID']]={} + AnswerDict[Dict['AnswerID']]['HtmlStr'] = '' + AnswerDict[Dict['AnswerID']]['AgreeCount'] = 0 + AnswerDict['AgreeCount'] =0 + AnswerDict['Title'] =t + AnswerDict['HtmlStr'] ='Wrong' + continue + Dict['ID'] = SelectAnswerList[0] + Dict['Sign'] = SelectAnswerList[1] + Dict['AgreeCount'] = SelectAnswerList[2] + Dict['AnswerContent'] = closeimg(AnswerContent.replace('
','
').replace('
','
'),ImgList) + Dict['QuestionID'] = SelectAnswerList[3] + Dict['AnswerID'] = SelectAnswerList[4] + Dict['UpdateTime'] = SelectAnswerList[5] + Dict['CommitCount'] = SelectAnswerList[6] + Dict['QuestionTitle'] = SelectAnswerList[7] + Dict['Questionhref'] = SelectAnswerList[8] + Dict['UserName'] = SelectAnswerList[9] + if len(SelectAnswerList[10])>10:#话题界面下没有用户IDLogo + Dict['UserIDLogoAdress']= '../images/'+SelectAnswerList[10][-15:] + ImgList.append(SelectAnswerList[10]) + else : + Dict['UserIDLogoAdress']= '' + if len(Dict['Sign'])==0: + SignStr ='' + else: + SignStr =',%(Sign)s'%Dict + HtmlStr =u""" +
+
+ %(UserName)s + """%Dict+SignStr+u"""

+ %(AnswerContent)s +
+
+
+ 赞同:%(AgreeCount)s +
+

+ 评论:%(CommitCount)s +

+
+
+
+

+
+ """%Dict + AnswerDict[t]={} + AnswerDict[t]['HtmlStr'] = HtmlStr + AnswerDict[t]['AgreeCount'] = int(Dict['AgreeCount']) + + + if AnswerDict.has_key('AgreeCount'): + AnswerDict['AgreeCount'] += int(Dict['AgreeCount']) + if len(AnswerDict['Title'])==0 and len(Dict['QuestionTitle'])!=0: + AnswerDict['Title'] = Dict['QuestionTitle'] + else: + AnswerDict['AgreeCount'] = int(Dict['AgreeCount']) + AnswerDict['Title'] = Dict['QuestionTitle'] + if len(AnswerDict['Title'])!=0 and not AnswerDict.has_key('HtmlStr'): + AnswerDict['HtmlStr'] = u''' + + + + + + + + %(Title)s + + +

%(Title)s



\n'''%AnswerDict#生成答案头#这点内存占用量,主不在乎~哈哈#一会仿知乎日报调整下标题的大小,现在手机没电了,打不开 + if not AnswerDict.has_key('HtmlStr'):#如果到最后也没找到问题标题的话。。。 + AnswerDict['HtmlStr'] = u''' + + + + + + + + + + +



\n''' + #对答案进行排序#好吧,麻烦点 + SortList = [] + for t in AnswerDict['AnswerList']: + SortList.append((AnswerDict[t]['AgreeCount'],AnswerDict[t]['HtmlStr'])) + for t in sorted(SortList,key=lambda SortList:SortList[0],reverse=True): + AnswerDict['HtmlStr']+=t[1] + AnswerDict['HtmlStr']+='' + + +def MakeInfoDict(InfoDict={},TargetFlag=0): + Dict = {} + if TargetFlag==1: + Dict['BookTitle'] = InfoDict['Name']+u'的知乎回答集锦' + Dict['AuthorAddress'] = InfoDict['ID'] + Dict['AuthorName'] = InfoDict['Name'] + Dict['Description'] = InfoDict['Name']+u'的知乎回答集锦' + if TargetFlag==2: + Dict['BookTitle'] = u'知乎收藏之'+InfoDict['Title'] + Dict['AuthorAddress'] = InfoDict['CollectionID'] + Dict['AuthorName'] = InfoDict['AuthorName'] + Dict['Description'] = InfoDict['Description'] + if TargetFlag==4: + Dict['BookTitle'] = u'知乎话题精华之'+InfoDict['Title'] + Dict['AuthorAddress'] = InfoDict['TopicID'] + Dict['AuthorName'] = u'知乎' + Dict['Description'] = InfoDict['Description'] + return Dict + +def EpubBuilder(MaxThread=20): + cursor = returnCursor() + FReadList = open('ReadList.txt','r') + Mkdir(u"电子书制作临时资源库") + Mkdir(u'电子书制作临时资源库/知乎图片池') + Mkdir(u"知乎答案集锦") + for url in FReadList: + ImgList = []#清空ImgList + InfoDict = {} + IndexList = [] + AnswerDict = {}#初始化 + print url + url = url.replace("\r",'').replace("\n",'') + TargetFlag,TargetID = ChooseTarget(url) + if TargetFlag!=4 and TargetFlag!=2 and TargetFlag!=1: + continue + try : + IndexList = pickle.loads(cursor.execute('select Pickle from VarPickle where Var= ?',(url,)).fetchone()[0]) + InfoDict = pickle.loads(cursor.execute('select Pickle from VarPickle where Var= ?',(url+'InfoDict',)).fetchone()[0]) + except TypeError: + print u'该url未成功读取' + continue + InfoDict = MakeInfoDict(InfoDict=InfoDict,TargetFlag=TargetFlag) + os.chdir(u'电子书制作临时资源库') + BufDir = u'%(BookTitle)s(%(AuthorAddress)s)_电子书制作临时文件夹'%InfoDict + Mkdir(BufDir) + os.chdir(BufDir) + f = open('mimetype','w') + f.write(u'application/epub+zip') + f.close() + Mkdir('META-INF') + Mkdir('OEBPS') + os.chdir(u'./'+'OEBPS') + Mkdir('html') + Mkdir('images') + os.chdir('..') + print u'文件目录创建完毕' + #文件目录创建完毕 + + #先生成目录与正文 + AnswerDict = {} + for t in IndexList: + QuestionID = int(re.search(r'(?<=http://www.zhihu.com/question/)\d*?(?=/answer/)',t).group(0)) + if AnswerDict.has_key(QuestionID) : + #存在该键值 + AnswerDict[QuestionID]['AnswerList'].append(t)#记录答案链接,稍后进行进一步处理 + else : + AnswerDict[QuestionID] = {} + AnswerDict[QuestionID]['AnswerList'] = [] + AnswerDict[QuestionID]['AnswerList'].append(t) + SortList = [] + DictNo = 0#为了输出更好看一些 + DictCountNo = len(AnswerDict) + for t in AnswerDict: + DictNo+=1 + PrintInOneLine(u'正在处理第{}个回答共{}个'.format(DictNo,DictCountNo)) + DealAnswerDict(cursor=cursor,ImgList=ImgList,AnswerDict=AnswerDict[t]) + SortList.append((t,AnswerDict[t]['AgreeCount'])) + #开始输出目录与文件 + print u'答案处理完成,开始输出文件' + TitleHtml = open("./OEBPS/html/title.html",'w') + TitleHtml.write(u''' + + + + + + + + 目录 + + +

目录



\n
    ''') + No = 1 + Ncx= u''' + + 目录 + + + ''' + Mainfest='' + Spine='' + DictNo = 0 + DictCountNo = len(SortList) + for t in sorted(SortList,key=lambda SortList:SortList[1],reverse=True): + DictNo += 1 + PrintInOneLine(u'正在输出第{}个文件,共{}个'.format(DictNo,DictCountNo)) + No+=1 + TitleStr = AnswerDict[t[0]]['Title'] + Ncx +=u' {title} \n'.format(title=TitleStr,No=No) + Mainfest+=u'\n'.format(No=No) + Spine +=u'\n'.format(No=No) + TitleHtml.write(u"""
  1. {Title}
  2. \n""".format(No=No,Title=TitleStr))#添加了一条隐藏下划线的设定 + f = open(u'./OEBPS/html/chapter{}.html'.format(No),'w') + f.write(AnswerDict[t[0]]['HtmlStr']) + f.close() + Ncx +="
    " + + TitleHtml.write(u"""
\n""") + TitleHtml.close() + + CreateOPF(InfoDict,Mainfest,Spine) + CreateNCX(InfoDict,Ncx) + f = open('./META-INF/container.xml','w') + f.write(''' + + + + + ''')#元文件 + f.close() + #临时创建一个封面文件 + f= open("OEBPS/html/cover.html","w") + + if(InfoDict['Description']==''): + Description ='' + else: + Description ='''
+

%(Description)s

'''%InfoDict + coverHtmlStr = ''' + + + + + + + + %(BookTitle)s + + +
+ +
\n +

%(BookTitle)s

+
+

%(AuthorName)s

'''%InfoDict+Description+''' +
+ 知识共享许可协议 + +
+
本作品采用知识共享署名-非商业性使用-禁止演绎 3.0 中国大陆许可协议进行许可。
+ + + '''%InfoDict + f.write(coverHtmlStr) + f.close() + print u'答案生成完毕' + #输出链接,反正最多就三四万个。。。 + print u'开始下载图片' + #复制CSS与cover两个文件到临时文件夹中 + + for root,target,flag in [ + (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/cover.png') ,u'OEBPS/images/cover.png' ,False) + , (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/88x31.png') ,u'OEBPS/images/88x31.png' ,False) + , (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/stylesheet.css'),u'OEBPS/stylesheet.css' ,True)]: + CopyFile(root=root,TargetFile=target,flag=flag) + + DownloadPicWithThread(ImgList,MaxThread=MaxThread) + ZipToEpub(InfoDict['BookTitle']+'.epub') + os.chdir('..') + os.chdir('..')#回到元目录 + PrintInOneLine('') + PrintInOneLine( u'\n%(BookTitle)s制作完成\n'%InfoDict+'\n') + + diff --git "a/\346\272\220\344\273\243\347\240\201/ZhihuEpub_Zhuanlan.py" "b/\346\272\220\344\273\243\347\240\201/ZhihuEpub_Zhuanlan.py" new file mode 100755 index 0000000..59374f8 --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/ZhihuEpub_Zhuanlan.py" @@ -0,0 +1,337 @@ +# -*- coding: utf-8 -*- +import json +from ZhihuEpub import CheckImgFileExist, DownloadPicWithThread , returnCursor , Mkdir , CreateMimeType , CreateContainer_XML , returnTagContent , removeTag , removeAttibute , closeimg , PixName , fixPic , DownloadImg , CreateOPF , CreateNCX , PrintDict , ZipToEpub ,PrintInOneLine,CopyFile,OpenUrl,ErrorReportText#复用。。。 + +import sys +reload( sys ) +sys.setdefaultencoding('utf-8') +###############头文件 + +import re +import urllib2 +import HTMLParser +import re +import zlib +import threading +import time +import datetime +import HTMLParser#HTML解码< +import json#在returnPostHeader中解析Post返回值 +import os#打开更新页面 +import urllib#编码请求字串,用于处理验证码 +import sqlite3#数据库! +#数据库部分 +import pickle +import socket#捕获Timeout错误 + +########################################################### + + +#################### +def ChooseTarget(url=''):#选择 + try : + return re.search(r'(?<=zhuanlan.zhihu.com/)[^/]*',url).group(0) + except AttributeError: + print u'未能匹配到专栏名' + return '' +######新修改 +def DealAnswerDict(JsonDict=[],ImgList=[],JsonDictList=[]):#必须是符合规定的Dict,规定附后 + for k in JsonDict: + t = k + Dict={} + Dict['ColumnID'] = t["column"]["slug"]#专栏ID + Dict['ColumnName'] = t["column"]["name"]#专栏名 + Dict['ArticleLink'] = t['links']['comments'] + Dict['TitleImage'] = t["titleImage"] + Dict['ArticleTitle'] = t["title"] + Dict['AuthorName'] = t['author']['name'] + Dict['AuthorIDLink'] = t['author']['profileUrl']#全地址 + Dict['PublishedTime']= t["publishedTime"] + Dict['Commit'] = t["commentsCount"] + Dict['Agree'] = t["likesCount"] + Dict['Content'] = t["content"] + Buf_AuthorID = t['author']['avatar']['id'] + Buf_AuthorTemplete = t['author']['avatar']['template'] + Dict['AuthorIDLogo'] = Buf_AuthorTemplete.format(id=Buf_AuthorID,size='s') + + HtmlStr =u""" + + + + + + + + %(ArticleTitle)s + + +
+ +
+
+

%(ArticleTitle)s

+
+
+
+ %(AuthorName)s +

+ %(Content)s +
+
+
+ 赞同:%(Agree)s +
+

+ 评论:%(Commit)s +

+
+
+
+

+
+ + """%Dict + Dict['HtmlStr'] = closeimg(text=HtmlStr.replace('
','
').replace('
','
'),ImgList=ImgList)#需要进一步处理#testTag + JsonDictList.append(Dict)#按发布顺序排序 + +def MakeInfoDict(ColumnInfoDict={}): + Dict = {} + Dict['BookTitle'] = u'知乎专栏之'+ColumnInfoDict['Name'] + Dict['AuthorAddress'] = ColumnInfoDict['Href'] + Dict['AuthorName'] = ColumnInfoDict['Name'] + Dict['Description'] = ColumnInfoDict['Description'] + return Dict + +def OpenUrl_Zhuanlan(url=""): + Time = 0 + Content = '' + while Time<10: + Content = OpenUrl(urllib2.Request(url=url),Timeout=30)#捕捉IOError错误的任务放在外层,以便及时跳出循环 + if Content == '': + Time+=1 + time.sleep(1)#休息一秒后再尝试打开 + print u'第({}/10)次尝试打开页面'.format(Time) + else : + return Content + print u'10次尝试全部失败,目标网址={} ,请检查网络链接或网址是否正确'.format(url) + return Content + +def ErrorReturn(ErrorInfo=""):#返回错误信息并退出,错误信息要用unicode编码 + print ErrorInfo + print u"点按回车继续" + raw_input() +def setMaxThread(): + try: + MaxThread=int(raw_input()) + except ValueError as e : + print e + print u'貌似输入的不是数...最大线程数重置为20,点击回车继续运行' + MaxThread=20 + raw_input() + if MaxThread>200 or MaxThread<1: + if MaxThread>200: + print u"线程不要太大好伐\n你线程开的这么凶残你考虑过知乎服务器的感受嘛" + else: + print u"不要输负数啊我去" + print u"最大线程数重置为20" + MaxThread=20 + print u'猛击回车继续~' + raw_input() + return MaxThread + +def ZhihuHelp_Epub(MaxThread=20): + ErrorReportText(flag=False) + FReadList = open('ReadList.txt','r') + Mkdir(u"电子书制作临时资源库") + Mkdir(u'电子书制作临时资源库/知乎图片池') + Mkdir(u"知乎答案集锦") + ErrorUrlList = [] + for url in FReadList: + ImgList = []#清空ImgList + InfoDict = {} + JsonDict = []#初始化 + print u'待抓取链接:',url + url = url.replace("\r",'').replace("\n",'') + Target = ChooseTarget(url) + if Target!='': + TargetUrl = 'http://zhuanlan.zhihu.com/api/columns/'+Target+'/posts?limit=20000&offset=0' + InfoTargetUrl = 'http://zhuanlan.zhihu.com/api/columns/'+Target + else: + continue + #专栏信息 + print u'开始获取专栏信息' + try: + t = OpenUrl_Zhuanlan(url=InfoTargetUrl) + if t=='': + print u'获取专栏信息失败' + ErrorReportText(u'获取专栏信息失败'+InfoTargetUrl) + continue + except IOError as e: + print u'解析专栏内容时出错' + ErrorReportText(u'解析专栏内容时出错'+str(e)) + continue + except ValueError as e: + print u'专栏不存在或知乎服务器拒绝访问' + ErrorReportText(u'专栏不存在或知乎服务器拒绝访问'+str(e)) + continue + + InfoDict = json.loads(t) + ColumnInfoDict = {} + ColumnInfoDict["FollowersCount"] = InfoDict["followersCount"] + ColumnInfoDict["Description"] = InfoDict["description"] + ColumnInfoDict["Name"] = InfoDict["name"] + ColumnInfoDict["Href"] = Target + InfoDict = MakeInfoDict(ColumnInfoDict) + + #专栏全文 + print u'开始获取专栏内容' + try: + t = OpenUrl_Zhuanlan(url=TargetUrl)#在专栏内容过多时会失效(有效下载时间只有3s左右,200k网速下只能下载300篇文章) + if t=="": + print u'专栏内容没有抓到'+InfoTargetUrl + ErrorReportText(u'错误原因:专栏内容没有抓到'+InfoTargetUrl) + continue + except IOError as e: + print u'解析专栏内容时出错' + ErrorReportText(u'解析专栏内容时出错'+str(e)) + continue + except ValueError as e: + print u'专栏不存在或知乎服务器拒绝访问' + ErrorReportText(u'专栏不存在或知乎服务器拒绝访问'+str(e)) + continue + + JsonDict = json.loads(t) + JsonDictList= [] + DealAnswerDict(JsonDict=JsonDict,ImgList=ImgList,JsonDictList=JsonDictList) + + + os.chdir(u'电子书制作临时资源库') + BufDir = u'%(BookTitle)s(%(AuthorAddress)s)_电子书制作临时文件夹'%InfoDict + Mkdir(BufDir) + os.chdir(BufDir) + f = open('mimetype','w') + f.write(u'application/epub+zip') + f.close() + Mkdir('META-INF') + Mkdir('OEBPS') + os.chdir(u'./'+'OEBPS') + Mkdir('html') + Mkdir('images') + os.chdir('..') + print u'文件目录创建完毕' + #文件目录创建完毕 + + #开始输出目录与文件 + print u'答案处理完成,开始输出文件' + TitleHtml = open("./OEBPS/html/title.html",'w') + TitleHtml.write(u''' + + + + + + + + 目录 + + +

目录



\n
    ''') + No = 1 + Ncx= u''' + + 目录 + + + ''' + Mainfest='' + Spine='' + DictNo = 0 + DictCountNo = len(JsonDictList) + for t in JsonDictList: + DictNo += 1 + if DictNo%10==0: + print u'正在输出第{}个文件,共{}个'.format(DictNo,DictCountNo) + No+=1 + TitleStr = t['ArticleTitle'] + Ncx +=u' {title} \n'.format(title=TitleStr,No=No) + Mainfest+=u'\n'.format(No=No) + Spine +=u'\n'.format(No=No) + + TitleHtml.write(u"""
  1. {Title}
  2. \n""".format(No=No,Title=TitleStr)) + f = open(u'./OEBPS/html/chapter{}.html'.format(No),'w') + f.write(t['HtmlStr']) + f.close() + Ncx +="
    " + + + TitleHtml.write(u"""
\n""") + TitleHtml.close() + + + CreateOPF(InfoDict,Mainfest,Spine) + CreateNCX(InfoDict,Ncx) + f = open('./META-INF/container.xml','w') + f.write(''' + + + + + ''')#元文件 + f.close() + #临时创建一个封面文件 + f= open("OEBPS/html/cover.html","w") + if(InfoDict['Description']==''): + Description ='' + else: + Description ='''
+

%(Description)s

'''%InfoDict + coverHtmlStr = ''' + + + + + + + + %(BookTitle)s + + +
+ +
\n +

%(BookTitle)s

+
+

%(AuthorName)s

'''%InfoDict+Description+''' +
+ 知识共享许可协议 + +
+
本作品采用知识共享署名-非商业性使用-禁止演绎 3.0 中国大陆许可协议进行许可。
+ + + '''%InfoDict + f.write(coverHtmlStr) + f.close() + print u'文集生成完毕' + #输出链接,反正最多就三四万个。。。 + ImgList = list(set(ImgList)) + #复制CSS与cover两个文件到临时文件夹中 + #print os.path.abspath('../../'+os.curdir+'/电子书制作资源文件夹/cover.jpg') + for root,target,flag in [ + (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/cover.png') ,u'OEBPS/images/cover.png' ,False) + , (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/88x31.png') ,u'OEBPS/images/88x31.png' ,False) + , (os.path.abspath('../../'+os.curdir+u'/电子书制作资源文件夹/stylesheet.css'),u'OEBPS/stylesheet.css' ,True)]: + CopyFile(root=root,TargetFile=target,flag=flag) + print u'开始下载图片' + DownloadPicWithThread(ImgList,MaxThread=MaxThread) + ZipToEpub(InfoDict['BookTitle']+'.epub') + os.chdir('..') + os.chdir('..')#回到元目录 + PrintInOneLine('\n'+u'%(BookTitle)s制作完成'%InfoDict+'\n') + print u'恭喜,所有电子书制作完成\n未成功打开的页面已输出至『未成功打开的页面.txt』中\n点按回车退出' + raw_input() + exit() +print u'请设置下载图片时的最大线程数\n线程越多速度越快,但线程过多会导致知乎服务器故障导致图片下载失败,默认最大线程数为20\n请输入一个数字(1~50),回车确认' +MaxThread = setMaxThread() +ZhihuHelp_Epub(MaxThread) diff --git "a/\346\272\220\344\273\243\347\240\201/ZhihuHelp.py" "b/\346\272\220\344\273\243\347\240\201/ZhihuHelp.py" new file mode 100755 index 0000000..ca623cc --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/ZhihuHelp.py" @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- +import urllib2 +import re +import zlib +import threading +import time +import datetime +import HTMLParser#HTML解码< +import json#在returnPostHeader中解析Post返回值 +import os#打开更新页面 + +import urllib#编码请求字串,用于处理验证码 + + +import sys#修改默认编码 +reload( sys ) +sys.setdefaultencoding('utf-8') + + + +import sqlite3#数据库! + +########################################################### +#数据库部分 +import pickle +import socket#捕获Timeout错误 +##################Epub######################################## +from ZhihuEpub import * +########################################################### +#所有可复用的函数均已转移至Epub文件内 +######################网页内容分析############################ +#个人答案页面、收藏夹页面答案连接提取 +#由returnAnswerList返回提取的答案链接列表,格式:['/question/21354/answer/15488',] +#网页答案抓取 +def FetchMaxAnswerPageNum(Content=""):#简单搜索比正则更快#OKTag + try: + Pos = Content.index(u'">下一页') + RightPos = Content.rfind(u"",0,Pos) + LeftPos = Content.rfind(u">",0,RightPos) + MaxPage = int(Content[LeftPos+1:RightPos]) + print u"答案列表共计{}页".format(MaxPage) + return MaxPage + except: + print u"答案列表共计1页" + return 1 +#答案信息读取 +def ThreadWorker(cursor=None,MaxThread=200,RequestDict={},Flag=1):#newCommitTag + + MaxPage = len(RequestDict) + ReDict = returnReDict() + AnswerDictList=[]#储存Dict,一并执行SQL + html_parser=HTMLParser.HTMLParser() + ThreadList=[] + Times = 0 + ErrorCount = 0 + LoopFlag = True + + for Page in range(MaxPage): + ThreadList.append(threading.Thread(target=WorkForFetchUrl,args=(ReDict,html_parser,RequestDict,Page,AnswerDictList,Flag))) + + while Times<10 and LoopFlag: + print u'开始第{}遍抓取,本轮共有{}张页面待抓取,共尝试10遍'.format(Times+1,len(ThreadList)) + for Page in range(MaxPage): + if threading.activeCount()-1 < MaxThread:#实际上是总线程数 + ThreadList[Page].start()#有种走钢丝的感觉。。。 + else : + PrintInOneLine(u'正在读取答案页面,线程库中还有{}条线程等待运行'.format(MaxPage-Page)) + time.sleep(1) + ThreadLiveDetect(ThreadList) + + LoopFlag = False + MaxPage = 0 + ThreadList = [] + for t in RequestDict: + if RequestDict[t][1]==False: + ThreadList.append(threading.Thread(target=WorkForFetchUrl,args=(ReDict,html_parser,RequestDict,t,AnswerDictList,Flag))) + MaxPage += 1 + LoopFlag = True + Times += 1 + if LoopFlag: + print u'第{}遍答案抓取执行完毕,{}张页面抓取失败,3秒后进行下一遍抓取'.format(Times+1,ErrorCount) + time.sleep(3) + DictNo = 0#美化输出 + DictCountNo = len(AnswerDictList) + for Dict in AnswerDictList: + DictNo += 1 + PrintInOneLine(u'正在将第{}/{}个答案存入数据库中'.format(DictNo,DictCountNo)) + AppendDictIntoDataBase(cursor,Dict) + return +def SaveCollectionIndexIntoDB(RequestDict={},CollectionID=0,cursor=None):#PassTag + AnswerList = [] + for t in RequestDict: + try: + RequestDict[t][0].get_full_url() + except AttributeError: + for i in RequestDict[t][0]: + AnswerList.append(i) + for i in AnswerList: + rowcount = cursor.execute('select count(CollectionID) from CollectionIndex where CollectionID=? and Questionhref=?',(CollectionID,i)).fetchone()[0] + if rowcount == 0: + cursor.execute('insert into CollectionIndex (CollectionID,Questionhref) values (?,?) ',(CollectionID,i)) + else : + pass + return +def AppendDictIntoDataBase(cursor=None,Dict={}) : #假定已有数据库#PassTag + bufDict = Dict + bufAnswerContent = bufDict['AnswerContent'] + del bufDict['AnswerContent'] + SaveToDB(cursor=cursor,NeedToSaveDict=bufDict,primarykey='Questionhref',TableName='AnswerInfoTable') + bufDict = {} + bufDict['AnswerContent'] = bufAnswerContent + bufDict['Questionhref'] = Dict['Questionhref'] + SaveToDB(cursor=cursor,NeedToSaveDict=bufDict,primarykey='Questionhref',TableName='AnswerContentTable') + return +def CheckUpdate():#检查更新,强制更新#newCommitTag + + print u"检查更新。。。" + try: + UpdateTime = urllib2.urlopen(u"http://zhihuhelpbyyzy-zhihu.stor.sinaapp.com/ZhihuHelpUpdateTime.txt",timeout=10) + except: + return + Time = UpdateTime.readline().replace(u'\n','').replace(u'\r','') + url = UpdateTime.readline().replace(u'\n','').replace(u'\r','') + UpdateComment= UpdateTime.read()#可行? + if Time=="2014-06-28": + return + else: + print u"发现新版本,\n更新说明:{}\n更新日期:{} ,点按回车进入更新页面".format(UpdateComment,Time) + print u'新版本下载地址:'+url + raw_input() + import webbrowser + webbrowser.open_new_tab(url) + return + +def returnReDict():#返回编译好的正则字典#Pass + Dict = {} + Dict['_Collection_QusetionTitle'] = re.compile(r'(?<=href="/question/\d{8}">).*?(?=)') + Dict['_QusetionTitle'] = re.compile(r'(?<=href="/question/\d{8}/answer/\d{8}">).*?(?=)') + Dict['_AnswerContent'] = re.compile(r'(?<=)') + Dict['_AgreeCount'] = re.compile(r'(?<=data-votecount=")\d*(?=">)') + Dict['_QuestionID'] = re.compile(r'(?<=)') + Dict['_Questionhref'] = re.compile(r'(?<=)') + Dict['_UpdateTime'] = re.compile(r'(?<=).*(?=)')#分为13:25、昨天 00:26、2013-05-07三种情况,需进一步处理 + Dict['_CommitCount'] = re.compile(r'(?<=).*?(?= )')#若转化为int失败则是添加评论#即为0条 + Dict['_ID'] = re.compile(r'(?<=).*(?=
)') + Dict['_Sign'] = re.compile(r'(?<=)') + Dict['_NoRecord'] = re.compile(r' 禁止转载')#怎么用? + Dict['_UserIDLogoAdress'] = re.compile(r'(?<=src=")http://p\d\.zhimg\.com[/\w]{7}[_\w]{11}\.jpg(?="class="zm-list-avatar)') + return Dict + +def ReadAnswer(ReDict,html_parser,LastDict,text="",Flag=1):#UnitTest#newCommitTag + Dict={} + Dict["ID"] = "" ## + Dict["Sign"] = ""# + Dict["AgreeCount"] = 0# + Dict["CommitCount"] = 0# + Dict["QuestionID"] = ""# + Dict["AnswerID"] = ""# + Dict["UpdateTime"] = "1970-01-01"# + Dict["QuestionTitle"] = "" ## + Dict["Questionhref"] = ""# + Dict["AnswerContent"] = "" ## + Dict["UserName"] = "ErrorName" ## + Dict['UserIDLogoAdress']= ''# + if text=='': + return Dict + try :#检测禁止转载 + ReDict['_NoRecord'].search(text).group(0) + return Dict + except : + pass + + def Help_ReadAnswer(t="",flag=True): + u""" + #辅助系函数 + * 用于提取答案内容,提高代码复用度 + * 因为某些项匹配失败后应直接调用默认值,所以添加了Flag以做区分 + """ + try: + Dict[t] = ReDict['_'+t].search(text).group(0) + except AttributeError: + if flag: + print u"没有收集到" + t + print u"知乎页面结构已变动,程序无法正常运行,快上知乎@姚泽源喊他更新脚本" + return False + else : + pass + return True + + #特殊处理 + try: + Dict["AnswerContent"] = html_parser.unescape(ReDict['_AnswerContent'].search(text).group(0)).encode("utf-8") + except AttributeError: + print u"答案内容没有收集到" + print u"知乎页面结构已变动,程序无法正常运行,快上知乎@姚泽源喊他更新脚本" + return Dict + + if Flag==1: + try : + Dict["QuestionTitle"] = ReDict['_QusetionTitle'].search(text).group(0) + except AttributeError: + Dict["QuestionTitle"] = LastDict["QuestionTitle"] + else : + try : + Dict["QuestionTitle"] = ReDict['_Collection_QusetionTitle'].search(text).group(0) + except AttributeError: + Dict["QuestionTitle"] = LastDict["QuestionTitle"] + try : + ID = ReDict['_ID'].search(text).group(0) + _UserName = re.compile(r'(?<=).*?(?=)')# 这里必须要用到已经捕获的ID,否则没法获得用户名 + Dict["UserName"] = _UserName.search(text).group(0) + except AttributeError : + try :#对应于知乎用户与匿名用户两种情况 + Dict["UserName"] = ReDict['_UnSuccessName'].search(text).group(0) + ID = '404NotFound!' + except AttributeError: + Dict["UserName"] = u"知乎用户" + ID = 'ZhihuUser!' + + Dict["ID"] = ID + + #常规匹配 + #时间放到最后,因为要靠时间验证是否匹配成功 + for t in ["AgreeCount","QuestionID","AnswerID","UpdateTime","Questionhref"]: + if Help_ReadAnswer(t): + pass + else: + return Dict + + for t in ['UserIDLogoAdress','Sign','CommitCount']: + if Help_ReadAnswer(t,False): + pass + else: + return Dict + + Dict["Questionhref"] = 'http://www.zhihu.com'+Dict["Questionhref"] + + if len(Dict["UpdateTime"])!=10 : + if len(Dict["UpdateTime"])!=5 : + Dict["UpdateTime"] = time.strftime(u'%Y-%m-%d',time.localtime(time.time()-86400))#昨天 + else : + Dict["UpdateTime"] = time.strftime(u'%Y-%m-%d',time.localtime(time.time()))#今天 + + return Dict + +def WorkForFetchUrl(ReDict={},html_parser=None,RequestDict={},Page=0,AnswerDictList=[],Flag=1):#抓取回答链接#注意,Page是字符串#Pass + print u"正在抓取第{}页上的答案".format(Page+1) + AnswerList = [] + try : + k = OpenUrl(RequestDict[Page][0]).decode(encoding='utf-8',errors='ignore')#文本内容必须要经过编码,否则会导致搜索时出现故障 + except ValueError as e:#对于40X错误不再继续读取 + print e + ErrorReportText(Info=u'读取答案内容出错\t:\t'+str(e)) + RequestDict[Page][1]=True + return + except IOError as e :#解压缩错误 + print e + return + if k=='': + return + if Flag==4: + k = k.split('
')[0] + Dict = ReadAnswer(ReDict,html_parser,Dict,k[t].replace('\r',"").replace('\n',"").decode(encoding="utf-8",errors='ignore'),Flag)#使用的是单行模式,所以要去掉\r\n避免匹配失败 + if Dict['UpdateTime']!='1970-01-01': + AnswerDictList.append(Dict) + AnswerList.append(Dict['Questionhref']) + print u'第{}页答案抓取成功'.format(Page+1) + if RequestDict[Page][1]==False:#答案列表储存于RequesDict中 + RequestDict[Page][0]=AnswerList + RequestDict[Page][1]=True + return + +def Login(cursor=None,UserID='mengqingxue2014@qq.com',UserPassword='131724qingxue'):#newCommitTag + qc_1 = ''#初始化 + print u'开始验证网页能否打开,验证完毕后将开始登陆流程,请稍等。。。' + header = { +'Accept' : '*/*' +,'Accept-Encoding' :'gzip,deflate,sdch' +,'Accept-Language' :'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4' +,'Connection' :'keep-alive' +,'Host' :'www.zhihu.com' +,'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8' +,'DNT':'1' +,'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36' +,'X-Requested-With':'XMLHttpRequest' +} + try : + ZhihuFrontPage=urllib2.urlopen(u"http://www.zhihu.com")#这里也可能出错#初次打开zhihu.com,获取xsrf信息 + except urllib2.HTTPError as e : + print u'服务器错误' + print u'错误内容',str(e).decode("utf-8") + print u'转为使用旧有PostHeader' + return OldPostHeader(cursor=cursor) + except urllib2.URLError as e : + print u'网络错误' + print u'错误内容',str(e).decode("utf-8") + print u'话说网络链接正常不?' + print u'转为使用旧有PostHeader' + return OldPostHeader(cursor=cursor) + try : + xsrf = '_xsrf=' + re.search(r'(?<=name="_xsrf" value=")[^"]*(?="/>)',ZhihuFrontPage.read()).group(0) + #print xsrf + except AttributeError: + ErrorReturn(u'xsrf读取失败,程序出现致命故障,无法继续运行。\n错误信息:知乎的登陆验证方式可能已更改,无法在返回的cookie中正则匹配到xsrf,请知乎@姚泽源更新脚本') + #except KeyError:#有这个错误? + # ErrorReturn( u'知乎没有设置xsrf\n可能登陆流程已修改,请知乎私信@姚泽源更新软件,不胜感激~') + + header['Cookie'] = xsrf+';l_c=1' + header['Origin'] = 'http://www.zhihu.com'#妈蛋知乎改登陆方式了这个坑坑了我整整两天!!! + header['Referer'] = 'http://www.zhihu.com/' + + print u'网页验证完毕,开始登陆流程' + if UserID == 'mengqingxue2014@qq.com': + UserID,UserPassword = InputUserNameandPassword() + AskRemberFlag = True + else : + AskRemberFlag = False + print u'可以通过使用记事本打开setting.ini文件修改用户名与密码来更换登录帐号' + MaxTryTime = 0#最多重复三次,三次后自动切换为使用旧有cookie进行登录 + try: + while MaxTryTime<10: + LoginData = urllib.quote('{0}&email={1}&password={2}&rememberme=y'.format(xsrf,UserID,UserPassword),safe='=&') + + request = urllib2.Request(url='http://www.zhihu.com/login',data=LoginData,headers=header) + try : + buf = urllib2.urlopen(request) + except urllib2.HTTPError as e :#还可能会有403/500错误 + print u'服务器错误' + print u'错误内容',e + print u'话说网络链接正常不?' + print u'转为使用旧有Header' + return OldPostHeader(cursor=cursor) + except urllib2.URLError as e : + print u'网络错误' + print u'错误内容',e + print u'话说网络链接正常不?' + print u'转为使用旧有PostHeader' + return OldPostHeader(cursor=cursor) + if qc_1 == '': + try :#如果是初次打开网页的话,登陆成功之后会一并返回qc_1与qc_0,都有用 + qc_1 = re.search(r'(q_c1=[^;]*)',buf.info()['set-cookie']).group(0) + except AttributeError: + ErrorReturn(u'qc_1读取失败,程序出现致命故障,无法继续运行。\n错误信息:知乎登陆流程可能已更改,无法在返回的cookie中正则匹配到qc_1,请知乎@姚泽源更新脚本') + qc_1 = '' + try: + qc_0 = re.search(r'(q_c0=[^;]*)',buf.info()['set-cookie']).group(0) + except AttributeError: + qc_0 = '' + + header['Cookie'] = qc_1 +';' +xsrf+'; l_c=1'+';'+qc_0 + buf_read = buf.read()#为什么只能读取一次???#info可以读取多次 + PostInfo = json.loads(buf_read) + if PostInfo['errcode']==269:#提示输入验证码#验证码错误是270#登陆成功不返回任何信息,所以会报错,测试一下#也可能是该用户尚未注册 + print u'抱歉,错误代码269\n知乎返回的错误信息如下:\n-----------------begin---------------------' + PrintDict(PostInfo) + print '------------------end----------------------' + ErrorReturn(u'表示无法处理这样的错误\n如果是需要输入验证码的话请用网页登陆一次知乎之后再来吧~(注:私人账号在网页上成功登陆一次之后就不会再出现验证码了)') + else : + if PostInfo['errcode']==270: + try : + print PostInfo['msg']['captcha'].encode('gbk')#win下要编码成gbk, + print u'验证码错误?什么情况。。。' + ErrorReturn(u'好了现在需要输入验证码了。。。命令行界面表示显示图片不能。。。\n请用网页登陆一次知乎之后再来吧~(注:私人账号在网页上成功登陆一次之后就不会再出现验证码了)') + except KeyError: + print u'用户名或密码不正确,请重新输入用户名与密码\n附注:知乎返回的错误信息见下' + PrintDict(PostInfo) + UserID,UserPassword = InputUserNameandPassword() + AskRemberFlag = True + print u'再次尝试登陆。。。' + MaxTryTime +=1 + else : + if MaxTryTime>=3: + print '三次尝试失败,转为使用已有cookie进行登录' + return OldPostHeader(cursor=cursor) + print u'未知错误,尝试重新登陆,请重新输入用户名与密码\nPS:知乎返回的错误信息:' + PrintDict(PostInfo) + UserID,UserPassword = InputUserNameandPassword() + AskRemberFlag = True + print u'再次尝试登陆。。。' + MaxTryTime +=1 + except KeyError: + print u'登陆成功!' + print u'登陆账号:',UserID + if AskRemberFlag: + print u'请问是否需要记住帐号密码?输入yes记住,输入其它任意字符跳过,回车确认' + if raw_input() == 'yes' : + Setting(ReadFlag=False,ID=UserID,Password=UserPassword) + print u'帐号密码已保存,可通过修改setting.ini进行修改密码等操作'#待添加修改帐号密码部分 + else: + print u'跳过保存环节,进入下一流程' + NewHeader = (str(datetime.date.fromtimestamp(time.time()).strftime('%Y-%m-%d')),header['Cookie'])#Time和datetime模块需要导入 + + SaveDict = {} + SaveDict['Var'] = 'PostHeader' + SaveDict['Pickle'] = pickle.dumps(NewHeader) + SaveToDB(cursor=cursor,NeedToSaveDict=SaveDict,primarykey='Var',TableName='VarPickle') + return header + #提取qc_0,储存之 +def OldPostHeader(cursor=None):#可以加一个网络更新cookie的功能#Pass + header = { +'Accept' : '*/*' +,'Accept-Encoding' :'gzip,deflate,sdch' +,'Accept-Language' :'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4' +,'Connection' :'keep-alive' +,'Host' :'www.zhihu.com' +,'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36' +} + + rowcount = cursor.execute('select count(Pickle) from VarPickle where Var="PostHeader"').fetchone()[0] + if rowcount==0: + List = ('2014-05-26', '_xsrf=9747077ec0374d469c91d06f4bf78c4d; q_c1=a5702f2ffc0344ae91e9efc0874012a8|1401117498000|1394290295000; q_c0="NTc1Mjk3OTkxMmM1NzU1N2MzZGQ5ZTMzMzRmNWVlMDR8MW9xU3hPdDF4U29BQlc4Qg==|1401117516|4bccb71dbbdd69c36ee800ef20586a6060ab8559";')#黄中华的cookie + else: + List = pickle.loads(cursor.execute("select Pickle from VarPickle where Var='PostHeader'").fetchone()[0])#这种错误。。。真难发现啊 + recordtime = datetime.datetime.strptime(List[0],'%Y-%m-%d').date() + today = datetime.date.today() + diff = 30- (today - recordtime).days + if diff > 0: + print u'转为使用'+List[0]+u'的登陆记录进行登陆,可能无法读取私人收藏夹。距离该记录过期还有'+str(diff)+u'天,过期后程序将无法继续运行,成功使用账号密码登陆可将记录刷新' + header['Cookie'] = List[1] + else : + ErrorReturn(u'账号密码登录&登陆记录已过期\n程序继续无法运行\n请重新运行程序,尝试使用账号密码进行登录。\n倘若一直无法登陆的话请上知乎私信@姚泽源反馈bug,不胜感激') + return header +def InputUserNameandPassword():#UnitTest + print u'请输入您的登陆用户名(知乎注册邮箱),回车确认' + print u'示例:\n用户名:mengqingxue2014@qq.com\n密码:131724qingxue\nPS:别用这个示例账号。。。登不上。。。囧' + print u'请输入用户名,回车确认' + LoopFlag = True + while LoopFlag: + UserID = raw_input() + try : + re.search(r'\w+@[\w\.]{3,}',UserID).group(0) + except AttributeError: + print u'话说,输入的账号不规范啊' + print u'账号规范:1.必须是正确格式的邮箱\n2.邮箱用户名只能由数字、字母和下划线_构成\n3.@后面必须要有.而且长度至少为3位' + print u'范例:mengqingxue2014@qq.com\n5719asd@sina.cn' + print u'请重新输入账号,回车确认' + else: + LoopFlag = False + print u'OK,请输入密码,回车确认' + LoopFlag = True + while LoopFlag: + UserPassword = raw_input() + try : + re.search(r'.{6,}',UserPassword).group(0)#密码中可以有符号 + except AttributeError: + print u'话说,输入的密码不规范啊' + print u'密码规范:1.至少6位' + print u'范例:helloworldvia27149,9527zaizhihu~' + print u'请重新输入密码,回车确认' + else: + LoopFlag = False + print u'Ok,开始发送登陆请求' + return UserID,UserPassword + +def returnConnCursor():#Pass + if os.path.isfile('./ZhihuDateBase.db'): + conn = sqlite3.connect("./ZhihuDateBase.db") + conn.text_factory = str + cursor = conn.cursor() + else: + conn = sqlite3.connect("./ZhihuDateBase.db") + conn.text_factory = str + cursor = conn.cursor() + cursor.execute("create table VarPickle (Var varchar(255),Pickle varchar(50000),primary key (Var))") + cursor.execute("create table AnswerInfoTable ( ID varchar(255) not Null , Sign varchar(9000) not Null , AgreeCount int(11) not Null , QuestionID varchar(20) not Null , AnswerID varchar(20) not Null , UpdateTime date not Null , CommitCount int(11) not Null , QuestionTitle varchar(1000) not Null , Questionhref varchar(255) not Null , UserName varchar(255) not Null ,UserIDLogoAdress varchar(255) not Null, primary key(Questionhref))")#没有数据库就新建一个 + cursor.execute("create table AnswerContentTable (AnswerContent longtext not Null , Questionhref varchar(255) not Null , primary key(Questionhref))") + cursor.execute("create table CollectionIndex (CollectionID varchar(50) not Null,Questionhref varchar(255) not Null, primary key(CollectionID,Questionhref))")#负责永久保存收藏夹链接,防止丢收藏 + cursor.execute(''' +CREATE TABLE IDInfo (IDLogoAdress varchar(255) default "http://p1.zhimg.com/da/8e/da8e974dc_m.jpg",ID varchar(255) not Null, Sign varchar(255) default '',Name varchar(255) default '',Ask varchar(255) default '',Answer int default 0,Post int default 0,Collect int default 0,Edit int default 0,Agree int default 0,Thanks int default 0,Followee int default 0,Follower int default 0,Watched int default 0,primary key(ID)) + ''') + cursor.execute('create table CollectionInfo (CollectionID varchar(50) not Null,Title varchar(255),Description varchar(1000),AuthorName varchar(255),AuthorID varchar(255),AuthorSign varchar(255),FollowerCount int(20) not Null ,primary key(CollectionID))') + cursor.execute('create table TopicInfo (Title varchar(255),Adress varchar(255),LogoAddress varchar(255),Description varchar(3000),TopicID varchar(50),primary key (TopicID))') + conn.commit() + return conn,cursor +def CatchFrontInfo(ContentText='',Flag=0,Target=''): + + html_parser = HTMLParser.HTMLParser() + def rTC(text=''):#returnTrueContent + return html_parser.unescape(text) + + + if ContentText=='': + return# 应该raise个错误出去 + print u'开始读取答案首页信息。。。' + InfoDict={} + if Flag ==0: + return InfoDict + if Flag ==1:#1,ID;2,Collect;3,RoundTable;4,Topic + ID_Name_Sign = re.search(r'(?<=
).*?(?=
)',ContentText).group(0) + InfoDict['IDLogoAdress'] = re.search(r'''(?<=src=")http://pic\d\.zhimg\.com/[_\w]{11}\.jpg(?="class="zm-profile-header-img zg-avatar-big zm-avatar-editor-preview")''',ContentText).group(0)#更新页面结构了我去 + InfoDict['ID'] = rTC(re.search(r'(?<=href="/people/)[^"]*',ID_Name_Sign).group(0)) + try: + InfoDict['Sign'] = rTC(re.search(r'(?<=)',ID_Name_Sign).group(0)) + except AttributeError: + InfoDict['Sign'] = '' + InfoDict['Name'] = rTC(re.search(r'(?<=">).*?(?=)',ID_Name_Sign).group(0)) + ################################## + Ask_Answer_Pst_CoE = re.findall(r'(?<=).*?(?=)',ContentText) + InfoDict['Ask'] = Ask_Answer_Pst_CoE[0] + InfoDict['Answer'] = Ask_Answer_Pst_CoE[1] + InfoDict['Post'] = Ask_Answer_Pst_CoE[2] + InfoDict['Collect'] = Ask_Answer_Pst_CoE[3] + InfoDict['Edit'] = Ask_Answer_Pst_CoE[4] + ################################## + + InfoDict['Agree'] = re.search(r'(?<=).*?(?=)',ContentText).group(0) + InfoDict['Thanks'] = re.search(r'(?<=).*?(?=)',ContentText).group(0) + ################################## + Followee_er = re.findall(r'(?<=
).*?(?=
)',ContentText).group(0) + InfoDict['AuthorName'] = rTC(re.search(r'(?<=">).*?(?=)',AuthorInfoStr).group(0)) + InfoDict['AuthorID'] = re.search(r'(?<=).*',AuthorInfoStr).group(0)) + except AttributeError: + InfoDict['AuthorSign'] = '' + try : + InfoDict['FollowerCount'] = re.search(r'(?<=
)\d*?(?=)',ContentText).group(0) + except AttributeError: + InfoDict['FollowerCount'] = 0#私密收藏夹没有关注数 + if Flag==3:#圆桌 + InfoDict['TableID'] = Target + Title_LogoAddress = re.search(r'(?<=

).*?(?=

)',ContentText).group(0) + + InfoDict['Title'] = rTC(re.search(r'(?<=).*(?=)',Title_LogoAddress).group(0)) + InfoDict['Adress'] = re.search(r'(?<=).*?(?=
)',ContentText).group(0)) + if Flag==4:#Topic + InfoDict['TopicID'] = Target + InfoDict['Title'] = rTC(re.search(r'(?<=).*?(?=)',ContentText).group(0)[:-12]) + + InfoDict['Adress'] = re.search(r'(?<=http://www.zhihu.com).*?(?=">)',ContentText).group(0)#/topic/19793502 + Buf = re.search(r'(?<=)',Buf).group(0) + try : + InfoDict['Description'] = rTC(re.search(r'(?<=
).*?(?=
)',ContentText).group(0)) #正常模式 + except AttributeError: + InfoDict['Description'] = rTC(re.search(r'(?<=
).*?(?=
)',ContentText).group(0)) #话题描述不可编辑 + print u'首页信息读取成功' + return InfoDict + +def CreateWorkListDict(PostHeader,TargetFlag,Target):#输入http头、目标代码,目标名,返回首页信息字典与待抓取Request字典#Pass + if TargetFlag==1: + url = 'http://www.zhihu.com/people/'+Target+'/answers?page=' + else: + if TargetFlag==2: + url = 'http://www.zhihu.com/collection/'+Target+'?page=' + else: + if TargetFlag==3:#特殊处理 + #url = 'http://www.zhihu.com/roundtable/'+Target+'/answers' + #InfoDict = CatchFrontInfo(k,TargetFlag) + #算了不做知乎圆桌了,麻烦 + return + else: + if TargetFlag==4: + url = 'http://www.zhihu.com/topic/'+Target+'/top-answers?page='#话题功能尚未测试 + else: + ErrorReturn(u'输入内容有误,创建待读取列表失败,在输入中提取到的内容为:\n{}\n,错误代码:{}\n'.format(Target,TargetFlag)) + Request = urllib2.Request(headers=PostHeader,url=url) + k = '' + Times = 0 + while k=='' and Times<10: + print u'正在打开答案首页',url + k = OpenUrl(Request).decode(encoding='utf-8',errors='ignore')#文本内容必须要经过编码,否则会导致搜索时出现故障 + if k=='': + print u'第{}/10次尝试打开答案首页失败,1秒后再次打开'.format(Times+1) + time.sleep(1) + Times+=1 + if k == '': + ErrorReturn(u'打开答案首页失败,请检查网络连接\n打开失败的网址为'+url) + k = k.replace('\n','').replace('\r','') + InfoDict = CatchFrontInfo(k,TargetFlag,Target) + MaxPage = FetchMaxAnswerPageNum(k) + RequestDict = {} + for No in range(MaxPage):#从0开始,不要干从1开始这种反直觉的事 + RequestDict[No] = [urllib2.Request(url=url+str(No+1),headers=PostHeader),False] + return InfoDict,RequestDict + +def returnIndexList(cursor=None,Target='',Flag=0,RequestDict={}):#Pass + print u'读取答案成功,正在生成答案索引' + Index = [] + if Flag==1: + for t in cursor.execute('select Questionhref from AnswerInfoTable where ID=? order by AgreeCount desc',(Target,)): + Index.append(t[0]) + else: + if Flag==2: + for t in cursor.execute('select CollectionIndex.Questionhref from CollectionIndex,AnswerInfoTable where CollectionIndex.CollectionID=? and CollectionIndex.Questionhref=AnswerInfoTable.Questionhref order by AnswerInfoTable.AgreeCount desc',(Target,)): + Index.append(t[0]) + else: + for t in RequestDict: + try: + for i in RequestDict[t][0]: + Index.append(i) + except TypeError:#当抓取不成功时貌似不会弹AttributeError,所以换成直接检测TypeError + pass + + print u'答案索引生成完毕,共有{}条答案链接'.format(len(Index)) + return Index +def SaveToDB(cursor=None,NeedToSaveDict={},primarykey='',TableName=''):#Pass + rowcount = cursor.execute('select count({}) from {} where {} = ?'.format(primarykey,TableName,primarykey),(NeedToSaveDict[primarykey],)).fetchone()[0] + SQL1 = 'insert into '+TableName+' (' + SQL2 = ' ) values ( ' + SQLTuple= [] + sql1 = 'update '+TableName+' set ' + for t in NeedToSaveDict: + SQL1+=t+',' + SQL2+='?,' + SQLTuple.append(NeedToSaveDict[t]) + sql1+=t+'=?,' + if rowcount==0: + #insert + cursor.execute(SQL1[:-1]+SQL2[:-1]+')',tuple(SQLTuple)) + else: + #update + SQLTuple.append(NeedToSaveDict[primarykey]) + cursor.execute(sql1[:-1]+' where '+primarykey+'= ?',tuple(SQLTuple)) + + + + +def ZhihuHelp(): + CheckUpdate() + conn,cursor = returnConnCursor() + ErrorReportText(flag=False)#初始化错误报告文件 + Mkdir(u'./知乎答案集锦') + try: + ReadList = open("./ReadList.txt","r") + except IOError as e: + print e + ErrorReturn(u'貌似程序所在的目录里好像没有ReadList.txt这个文件,先手工新建一个在运行知乎助手吧') + ReSettingFlag=True + if os.path.isfile('setting.ini'): + try : + Setting() + except : + pass + else: + print u'检测到有设置文件,是否直接使用之前的设置?(帐号、密码、最大线程数)' + print u'直接点按回车使用之前设置,敲入任意字符后点按回车进行重新设置' + if raw_input()=='': + ReSettingFlag=False + if ReSettingFlag: + PostHeader = Login(cursor=cursor)# + MaxThread = 20 + print u'ZhihuHelp热身中。。。\n开始设定最大允许并发线程数\n线程越多速度越快,但线程过多会导致知乎服务器故障无法打开网页读取答案失败,默认最大线程数为20\n请输入一个数字(1~50),回车确认' + MaxThread = setMaxThread() + Setting(ReadFlag=False,MaxThread=str(MaxThread)) + else: + ID,Password,MaxThread = Setting() + print u'配置信息读取完毕' + print u'登录帐号:{}\n登录密码:{}\n最大线程数:{}'.format(ID,Password,MaxThread) + PostHeader = Login(UserID=ID,UserPassword=Password,cursor=cursor)# + #***********又要重写了**************# + + #*************************# + + for TargetUrl in ReadList: + print u'开始识别目标网址' + TargetUrl = TargetUrl.replace('\n','').replace('\r','') + TargetFlag,Target = ChooseTarget(TargetUrl) + if TargetFlag==0: + print u'识别目标网址失败,原网址:',TargetUrl,u'识别结果:',Target + continue + try : + InfoDict,RequestDict= CreateWorkListDict(PostHeader=PostHeader,TargetFlag=TargetFlag,Target=Target) + except IOError as e: + print e + ErrorReportText(Info=u'读取用户信息出错\t:\t'+str(e)) + continue + except ValueError as e : + print e + print u'404网页错误或服务器拒绝访问\nPS:话说那个链接是私人收藏夹么?下载私人收藏夹需要用自己的帐号登陆知乎助手才行。' + ErrorReportText(Info=u'读取用户信息出错\t:\t'+str(e)) + continue + print u'开始抓取答案' + ThreadWorker(cursor=cursor,MaxThread=MaxThread,RequestDict=RequestDict,Flag=TargetFlag) + if TargetFlag==2: + SaveCollectionIndexIntoDB(RequestDict=RequestDict,CollectionID=Target,cursor=cursor) + conn.commit() + IndexList = returnIndexList(cursor=cursor,Target=Target,Flag=TargetFlag,RequestDict=RequestDict) + #将IndexList存在数据库中,方便制作电子书 + SaveToDBDict={} + SaveToDBDict['Var'] = TargetUrl + SaveToDBDict['Pickle']= pickle.dumps(IndexList) + SaveToDB(cursor=cursor,NeedToSaveDict=SaveToDBDict,primarykey='Var',TableName='VarPickle') + conn.commit() + #直接储存InfoDict + SaveToDBDict={} + SaveToDBDict['Var'] = TargetUrl+'InfoDict' + SaveToDBDict['Pickle']= pickle.dumps(InfoDict) + SaveToDB(cursor=cursor,NeedToSaveDict=SaveToDBDict,primarykey='Var',TableName='VarPickle') + conn.commit() + + print u'所有链接抓取完毕,久等了~' + print u'开始生成电子书' + EpubBuilder(MaxThread) + print u'恭喜,所有电子书制作完毕' + print u'点按回车退出' + raw_input() +ZhihuHelp() diff --git "a/\346\272\220\344\273\243\347\240\201/tags" "b/\346\272\220\344\273\243\347\240\201/tags" new file mode 100755 index 0000000..36cd02a --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/tags" @@ -0,0 +1,138 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.9~svn20110310 // +AppendDictIntoDataBase ZhihuHelp.py /^def AppendDictIntoDataBase(cursor=None,Dict={}) : #假定已有数据库#PassTag$/;" f access:public +AppendDictIntoDataBase 软件说明.py /^def AppendDictIntoDataBase(cursor=None,Dict={}) $/;" f access:public +CatchFrontInfo ZhihuHelp.py /^def CatchFrontInfo(ContentText='',Flag=0,Target=''):$/;" f access:public +CatchFrontInfo 软件说明.py /^def CatchFrontInfo(ContentText='',Flag=0,Target=''):$/;" f access:public +CheckImgFileExist ZhiHuEpub.py /^def CheckImgFileExist(CheckList=[],ErrorList=[]):#PassTag$/;" f access:public +CheckImgFileExist 软件说明.py /^def CheckImgFileExist(CheckList=[],ErrorList=[]):$/;" f access:public +CheckUpdate ZhihuHelp.py /^def CheckUpdate():#检查更新,强制更新#newCommitTag$/;" f access:public +CheckUpdate 软件说明.py /^def CheckUpdate():$/;" f access:public +ChooseTarget ZhiHuEpub.py /^def ChooseTarget(url=''):#选择#Pass$/;" f access:public +ChooseTarget ZhiHuEpub_Zhuanlan.py /^def ChooseTarget(url=''):#选择$/;" f access:public +ChooseTarget 软件说明.py /^def ChooseTarget(url=''):#选择$/;" f access:public +CopyFile ZhiHuEpub.py /^def CopyFile(root='',TargetFile='',flag=True):$/;" f access:public +CreateContainer_XML ZhiHuEpub.py /^def CreateContainer_XML():#PassTag$/;" f access:public +CreateContainer_XML 软件说明.py /^def CreateContainer_XML():$/;" f access:public +CreateMimeType ZhiHuEpub.py /^def CreateMimeType():#PassTag$/;" f access:public +CreateMimeType 软件说明.py /^def CreateMimeType():$/;" f access:public +CreateNCX ZhiHuEpub.py /^def CreateNCX(NCXInfoDict={},Ncx=''):#PassTag$/;" f access:public +CreateNCX 软件说明.py /^def CreateNCX(NCXInfoDict={},Ncx=''):$/;" f access:public +CreateOPF ZhiHuEpub.py /^def CreateOPF(OPFInfoDict={},Mainfest='',Spine=''):#生成文件函数均假定当前目录为电子书根目录#PassTag$/;" f access:public +CreateOPF 软件说明.py /^def CreateOPF(OPFInfoDict={},Mainfest='',Spine='')$/;" f access:public +CreateWorkListDict ZhihuHelp.py /^def CreateWorkListDict(PostHeader,TargetFlag,Target):#输入http头、目标代码,目标名,返回首页信息字典与待抓取Request字典#Pass$/;" f access:public +CreateWorkListDict 软件说明.py /^def CreateWorkListDict(PostHeader,TargetFlag,Target):$/;" f access:public +DealAnswerDict ZhiHuEpub.py /^def DealAnswerDict(cursor=None,AnswerDict={},ImgList=[]):#必须是符合规定的Dict,规定附后$/;" f access:public +DealAnswerDict ZhiHuEpub_Zhuanlan.py /^def DealAnswerDict(JsonDict=[],ImgList=[],JsonDictList=[]):#必须是符合规定的Dict,规定附后$/;" f access:public +DealAnswerDict 软件说明.py /^def DealAnswerDict(cursor=None,AnswerDict={},ImgList=[]):#必须是符合规定的Dict,规定附后$/;" f access:public +DownloadImg ZhiHuEpub.py /^def DownloadImg(imghref='',ErrorList=[]):#下载失败时应报错或重试#文件已成功下载时也添加到ErrorList中#newCommitTag$/;" f access:public +DownloadImg 软件说明.py /^def DownloadImg(imghref='',ErrorList=[]):$/;" f access:public +DownloadPicWithThread ZhiHuEpub.py /^def DownloadPicWithThread(ImgList=[],MaxThread=20):#添加图片池功能#当图片下载完成时在ImgList中删除之#newCommitTag$/;" f access:public +DownloadPicWithThread 软件说明.py /^def DownloadPicWithThread(ImgList=[],MaxThread=20):#添加图片池功能#当图片下载完成时在ImgList中删除之$/;" f access:public +ErrorReportText ZhiHuEpub.py /^def ErrorReportText(Info='',flag=True):$/;" f access:public +ErrorReturn ZhiHuEpub.py /^def ErrorReturn(ErrorInfo=""):#返回错误信息并退出,错误信息要用unicode编码$/;" f access:public +ErrorReturn ZhiHuEpub_Zhuanlan.py /^def ErrorReturn(ErrorInfo=""):#返回错误信息并退出,错误信息要用unicode编码$/;" f access:public +ErrorReturn 软件说明.py /^def ErrorReturn(ErrorInfo=""):#返回错误信息并退出,错误信息要用unicode编码$/;" f access:public +FetchMaxAnswerPageNum ZhihuHelp.py /^def FetchMaxAnswerPageNum(Content=""):#简单搜索比正则更快#OKTag$/;" f access:public +FetchMaxAnswerPageNum 软件说明.py /^def FetchMaxAnswerPageNum(Content=""):$/;" f access:public +File wahaha.py /^ File = open(title+'.htm','w')$/;" v +Help_ReadAnswer ZhihuHelp.py /^ def Help_ReadAnswer(t="",flag=True):$/;" f function:ReadAnswer access:public +Help_ZipToEpub ZhiHuEpub.py /^ def Help_ZipToEpub(Dir='.'):$/;" f function:ZipToEpub access:public +InputUserNameandPassword ZhihuHelp.py /^def InputUserNameandPassword():#UnitTest$/;" f access:public +InputUserNameandPassword 软件说明.py /^def InputUserNameandPassword():$/;" f access:public +List ZhihuHelp.py /^ List = ('2014-05-26', '_xsrf=9747077ec0374d469c91d06f4bf78c4d; q_c1=a5702f2ffc0344ae91e9efc0874012a8|1401117498000|1394290295000; q_c0="NTc1Mjk3OTkxMmM1NzU1N2MzZGQ5ZTMzMzRmNWVlMDR8MW9xU3hPdDF4U29BQlc4Qg==|1401117516|4bccb71dbbdd69c36ee800ef20586a6060ab8559";')#黄中华的cookie$/;" v +List ZhihuHelp.py /^ List = pickle.loads(cursor.execute("select Pickle from VarPickle where Var='PostHeader'").fetchone()[0])#这种错误。。。真难发现啊$/;" v +Login ZhihuHelp.py /^def Login(cursor=None,UserID='mengqingxue2014@qq.com',UserPassword='131724qingxue'):#newCommitTag$/;" f access:public +Login 软件说明.py /^def Login(cursor=None,UserID='mengqingxue2014@qq.com',UserPassword='131724qingxue'):$/;" f access:public +LoginData ZhihuHelp.py /^ LoginData = urllib.quote('{0}&email={1}&password={2}&rememberme=y'.format(xsrf,UserID,UserPassword),safe='=&')$/;" v +MakeInfoDict ZhiHuEpub.py /^def MakeInfoDict(InfoDict={},TargetFlag=0):$/;" f access:public +MakeInfoDict ZhiHuEpub_Zhuanlan.py /^def MakeInfoDict(ColumnInfoDict={}):$/;" f access:public +MakeInfoDict 软件说明.py /^def MakeInfoDict(InfoDict={},TargetFlag=0):$/;" f access:public +MaxThread ZhiHuEpub_Zhuanlan.py /^MaxThread = setMaxThread()$/;" v +MaxTryTime ZhihuHelp.py /^ MaxTryTime = 0#最多重复三次,三次后自动切换为使用旧有cookie进行登录$/;" v +MkUrlRequestForOpenUrl test_ZhihuHelp.py /^def MkUrlRequestForOpenUrl(url=''):$/;" f access:public +Mkdir ZhiHuEpub.py /^def Mkdir(DirName=u''):#PassTag$/;" f access:public +Mkdir 软件说明.py /^def Mkdir(DirName=u''):$/;" f access:public +NewHeader ZhihuHelp.py /^ NewHeader = (str(datetime.date.fromtimestamp(time.time()).strftime('%Y-%m-%d')),header['Cookie'])#Time和datetime模块需要导入 $/;" v +OldPostHeader ZhihuHelp.py /^def OldPostHeader(cursor=None):#可以加一个网络更新cookie的功能#Pass$/;" f access:public +OldPostHeader 软件说明.py /^def OldPostHeader(cursor=None)$/;" f access:public +OpenHttpPage 软件说明.py /^def OpenHttpPage(url=''):#打开网页,负责下载图片或者打开json列表,只尝试一次,失败时返回空字符串,错误信息中包含未打开网址。#$/;" f access:public +OpenUrl ZhiHuEpub.py /^def OpenUrl(Request):#打开网页,只尝试一次,失败时返回空字符串,错误信息中包含未打开网址。话说字符串分割对空列表还有效否?#OKTag$/;" f access:public +OpenUrl ZhiHuEpub_Zhuanlan.py /^def OpenUrl(url=""):$/;" f access:public +OpenUrl 软件说明.py /^def OpenUrl(Request):#打开网页,只尝试一次,失败时返回空字符串,错误信息中包含未打开网址。话说字符串分割对空列表还有效否?$/;" f access:public +PixName ZhiHuEpub.py /^def PixName(t):#PassTag$/;" f access:public +PixName 软件说明.py /^def PixName(t):$/;" f access:public +PostHeader wahaha.py /^PostHeader = OldPostHeader()$/;" v +PostInfo ZhihuHelp.py /^ PostInfo = json.loads(buf_read)$/;" v +PrintDict ZhiHuEpub.py /^def PrintDict(Dict={},Front=''):$/;" f access:public +PrintDict 软件说明.py /^def PrintDict(Dict={}):$/;" f access:public +PrintInOneLine ZhiHuEpub.py /^def PrintInOneLine(text=''):$/;" f access:public +PrintInOneLine 软件说明.py /^def PrintInOneLine(text=''):$/;" f access:public +ReadAnswer ZhihuHelp.py /^def ReadAnswer(ReDict,html_parser,LastDict,text="",Flag=1):#UnitTest#newCommitTag$/;" f access:public +ReadAnswer 软件说明.py /^def ReadAnswer(ReDict,html_parser,LastDict,text="",Flag=1):$/;" f access:public +SaveCollectionIndexIntoDB ZhihuHelp.py /^def SaveCollectionIndexIntoDB(RequestDict={},CollectionID=0,cursor=None):#PassTag$/;" f access:public +SaveCollectionIndexIntoDB 软件说明.py /^def SaveCollectionIndexIntoDB(RequestDict={},CollectionID=0,cursor=None):$/;" f access:public +SaveDict ZhihuHelp.py /^ SaveDict = {}$/;" v +SaveToDB ZhihuHelp.py /^def SaveToDB(cursor=None,NeedToSaveDict={},primarykey='',TableName=''):#Pass$/;" f access:public +SaveToDB 软件说明.py /^def SaveToDB(cursor=None,NeedToSaveDict={},primarykey='',TableName=''):$/;" f access:public +TestCase test_ZhihuHelp.py /^class TestCase(unittest.TestCase):$/;" c inherits:unittest.TestCase +ThreadLiveDetect ZhiHuEpub.py /^def ThreadLiveDetect(ThreadList=[]):$/;" f access:public +ThreadLiveDetect 软件说明.py /^def ThreadLiveDetect(ThreadList=[]):$/;" f access:public +ThreadWorker ZhihuHelp.py /^def ThreadWorker(cursor=None,MaxThread=200,RequestDict={},Flag=1):#newCommitTag$/;" f access:public +ThreadWorker 软件说明.py /^def ThreadWorker(cursor=None,ErrorTextDict={},MaxThread=200,RequestDict={},Flag=1)$/;" f access:public +WorkForFetchUrl ZhihuHelp.py /^def WorkForFetchUrl(ReDict={},html_parser=None,RequestDict={},Page=0,AnswerDictList=[],Flag=1):#抓取回答链接#注意,Page是字符串#Pass$/;" f access:public +WorkForFetchUrl 软件说明.py /^def WorkForFetchUrl(ErrorTextDict={},ReDict={},html_parser=None,RequestDict={},Page=0,AnswerDictList=[],Flag=1):#抓取回答链接#注意,Page是字符串$/;" f access:public +WriteHtmlFile 软件说明.py /^def WriteHtmlFile(cursor=None,IndexList=[],InfoDict={},TargetFlag=0):$/;" f access:public +ZhihuEpub ZhiHuEpub.py /^def ZhihuEpub(MaxThread=20):$/;" f access:public +ZhihuFrontPage ZhihuHelp.py /^ ZhihuFrontPage=urllib2.urlopen(u"http:\/\/www.zhihu.com")#这里也可能出错#初次打开zhihu.com,获取xsrf信息$/;" v +ZhihuHelp ZhihuHelp.py /^def ZhihuHelp():$/;" f access:public +ZhihuHelp_Epub ZhiHuEpub_Zhuanlan.py /^def ZhihuHelp_Epub(MaxThread=20):$/;" f access:public +ZipToEpub ZhiHuEpub.py /^def ZipToEpub(EpubName='a.epub'):#newCommitTag$/;" f access:public +ZipToEpub 软件说明.py /^def ZipToEpub(EpubName='a.epub'):$/;" f access:public +buf ZhihuHelp.py /^ buf = urllib2.urlopen(request)$/;" v +buf_read ZhihuHelp.py /^ buf_read = buf.read()#为什么只能读取一次???#info可以读取多次$/;" v +closeimg ZhiHuEpub.py /^def closeimg(text='',ImgList=[]):#PassTag$/;" f access:public +closeimg 软件说明.py /^def closeimg(text='',ImgList=[]):$/;" f access:public +content wahaha.py /^ content = OpenUrl(urllib2.Request(headers=PostHeader,url=t.replace('\\r','').replace('\\n','')))$/;" v +diff ZhihuHelp.py /^ diff = 30- (today - recordtime).days$/;" v +f wahaha.py /^f = open('ReadList.txt','r')$/;" v +fixPic ZhiHuEpub.py /^def fixPic(t='',ImgList=[]):#PassTag$/;" f access:public +fixPic 软件说明.py /^def fixPic(t='',ImgList=[]):$/;" f access:public +qc_0 ZhihuHelp.py /^ qc_0 = ''$/;" v +qc_0 ZhihuHelp.py /^ qc_0 = re.search(r'(q_c0=[^;]*)',buf.info()['set-cookie']).group(0)$/;" v +qc_1 ZhihuHelp.py /^ qc_1 = ''$/;" v +qc_1 ZhihuHelp.py /^ qc_1 = re.search(r'(q_c1=[^;]*)',buf.info()['set-cookie']).group(0)$/;" v +recordtime ZhihuHelp.py /^ recordtime = datetime.datetime.strptime(List[0],'%Y-%m-%d').date()$/;" v +removeAttibute ZhiHuEpub.py /^def removeAttibute(text='',AttList=[]):#PassTag$/;" f access:public +removeAttibute 软件说明.py /^def removeAttibute(text='',AttList=[]):$/;" f access:public +removeTag ZhiHuEpub.py /^def removeTag(text='',tagname=[]):#NonUseTag$/;" f access:public +removeTag 软件说明.py /^def removeTag(text='',tagname=[]):$/;" f access:public +request ZhihuHelp.py /^ request = urllib2.Request(url='http:\/\/www.zhihu.com\/login',data=LoginData,headers=header)$/;" v +returnConnCursor ZhihuHelp.py /^def returnConnCursor():#Pass$/;" f access:public +returnConnCursor 软件说明.py /^def returnConnCursor():$/;" f access:public +returnCursor ZhiHuEpub.py /^def returnCursor():#PassTag$/;" f access:public +returnCursor 软件说明.py /^def returnCursor():$/;" f access:public +returnHtml_FrontPage 软件说明.py /^def returnHtml_FrontPage(cursor=None,Flag=0,InfoDict={}):#兼职把Info存到数据库里$/;" f access:public +returnIndexList ZhihuHelp.py /^def returnIndexList(cursor=None,Target='',Flag=0,RequestDict={}):#Pass$/;" f access:public +returnIndexList 软件说明.py /^def returnIndexList(cursor=None,Target='',Flag=0,RequestDict={}):$/;" f access:public +returnMarkDownCssStyle MarkDownCssStyle.py /^def returnMarkDownCssStyle():$/;" f access:public +returnReDict ZhihuHelp.py /^def returnReDict():#返回编译好的正则字典#Pass$/;" f access:public +returnReDict 软件说明.py /^def returnReDict():#返回编译好的正则字典$/;" f access:public +returnTagContent ZhiHuEpub.py /^def returnTagContent(text='',tagname=''):#NonUseTag$/;" f access:public +returnTagContent 软件说明.py /^def returnTagContent(text='',tagname=''):$/;" f access:public +rowcount ZhihuHelp.py /^ rowcount = cursor.execute('select count(Pickle) from VarPickle where Var="PostHeader"').fetchone()[0] $/;" v +runner test_ZhihuHelp.py /^runner = unittest.TextTestRunner()$/;" v +setMaxThread ZhiHuEpub.py /^def setMaxThread():$/;" f access:public +setMaxThread ZhiHuEpub_Zhuanlan.py /^def setMaxThread():$/;" f access:public +setMaxThread 软件说明.py /^def setMaxThread():$/;" f access:public +setUp test_ZhihuHelp.py /^ def setUp(self):$/;" m class:TestCase access:public +suite test_ZhihuHelp.py /^def suite():$/;" f access:public +tearDown test_ZhihuHelp.py /^ def tearDown(self):$/;" m class:TestCase access:public +testChooseTarget test_ZhihuHelp.py /^ def testChooseTarget(self):$/;" m class:TestCase access:public +testFetchMaxAnswerPageNum test_ZhihuHelp.py /^ def testFetchMaxAnswerPageNum(self):$/;" m class:TestCase access:public +title wahaha.py /^ title = re.search(r'*?<\/title>',content.replace('\\r','').replace('\\n','').replace(' ','')).group(0)$/;" v +today ZhihuHelp.py /^ today = datetime.date.today()$/;" v diff --git "a/\346\272\220\344\273\243\347\240\201/test_ZhihuHelp.py" "b/\346\272\220\344\273\243\347\240\201/test_ZhihuHelp.py" new file mode 100755 index 0000000..37083b3 --- /dev/null +++ "b/\346\272\220\344\273\243\347\240\201/test_ZhihuHelp.py" @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +import unittest +import urllib2 +def MkUrlRequestForOpenUrl(url=''): + header = { +'Accept' : '*/*' +,'Cookie':'q_c1=d55d91ee99a1484ea45c523d43ad3cc4|1399174529000|1396527477000; _xsrf=d76ccddd6631420787df7241954e0f76; c_c=ff0a1f30d3a211e3ba215254291c3363; q_c0="NTc1Mjk3OTkxMmM1NzU1N2MzZGQ5ZTMzMzRmNWVlMDR8MW9xU3hPdDF4U29BQlc4Qg==|1399218282|574021a9bbda221cd7144475ca05ca6a1b489e59";' +,'Accept-Encoding' :'gzip,deflate,sdch' +,'Accept-Language' :'zh,zh-CN;q=0.8,en-GB;q=0.6,en;q=0.4' +,'Connection' :'keep-alive' +,'Host' :'www.zhihu.com' +,'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36' +} + return urllib2.Request(headers=header,url=url) + +from ZhihuHelp import FetchMaxAnswerPageNum,OpenUrl,ChooseTarget#ZhihuHelp中不能有主函数,否则会一开始就直接运行 + +class TestCase(unittest.TestCase): + def setUp(self): + self.test = FetchMaxAnswerPageNum() + def tearDown(self): + self.test = None + def testFetchMaxAnswerPageNum(self): + for t,number in [('http://www.zhihu.com/people/fei-ding-ci',1),#用户 + ('http://www.zhihu.com/people/mollymai-75/answers',4), + ('http://www.zhihu.com/people/Eno-Bea/answers',5), + ('http://www.zhihu.com/topic/19551147/top-answers',50), + + ('http://www.zhihu.com/topic/19563810/top-answers',1),#话题 + ('http://www.zhihu.com/topic/19587979/top-answers',4), + ('http://www.zhihu.com/topic/19551147/top-answers',50), + + ('http://www.zhihu.com/collection/36677470 ',1),#收藏 + ('http://www.zhihu.com/collection/20205640',3), + ('http://www.zhihu.com/collection/19762984',5), + ]: + content = OpenUrl(MkUrlRequestForOpenUrl(t)) + value = FetchMaxAnswerPageNum(content) + self.assertEqual(value,number) + def testChooseTarget(self): + for url,Return in [ + ('http://www.zhihu.com/people/yao-ze-yuan' ,(1,'yao-ze-yuan')), + ('http://www.zhihu.com/people/15.asd' ,(1,'15.asd')), + ('http://www.zhihu.com/people/-----' ,(1,'-----')), + ('http://www.zhihu.com/people/___/hello wperld' ,(1,'___')), + ('http://www.zhihu.com/people/asdqe/awe' ,(1,'asdqe')), + ('http://www.zhihu.com/people/pkpkpk' ,(1,'pkpkpk')), + ('http://www.zhihu.com/people/over' ,(1,'over')), + ('http://www.zhihu.com/collection/192196' ,(2,'192196')), + ('http://www.zhihu.com/collection/192196/123213',(2,'192196')), + ('http://www.zhihu.com/collection/192196/asd-r' ,(2,'192196')), + ('http://www.zhihu.com/collection/192196' ,(2,'192196')), + ('http://www.zhihu.com/topic/192196/19dsad' ,(4,'192196')), + ('http://www.zhihu.com/topic/192196 ' ,(4,'192196')), + + + + ]: + value = ChooseTarget(url) + self.assertEqual(value,Return) + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(TestCase('testChooseTarget')) + return suite +runner = unittest.TextTestRunner() +runner.run(suite()) diff --git "a/\347\250\213\345\272\217\350\257\264\346\230\216/ReadMe.md" "b/\347\250\213\345\272\217\350\257\264\346\230\216/ReadMe.md" new file mode 100755 index 0000000..ff07737 --- /dev/null +++ "b/\347\250\213\345\272\217\350\257\264\346\230\216/ReadMe.md" @@ -0,0 +1,123 @@ +#程序说明 +##2014年3月31日23:51:40 +##def returnPostHeader(UserID='mengqingxue2014@qq.com',Password='131724qingxue',LoginSuccessFlag=True) +### 函数说明 + * 传入UserID与Password,返回一个携带Cookie的Http头字典 + * 默认登陆ID为@孟晴雪 + * 网络故障或登陆失败时打印错误信息,需要依次输入帐号密码重新登陆 +##class HtmlParser(HTMLParser.HTMLParser) +###类说明 + * 使用feed(HtmlContent)传入网页内容 + * 使用returnAnswerList()返回提取出的答案链接列表 + * 格式:['/question/21354/answer/15488',] +##def PictureReload(Content=''): +###函数说明 + * 知乎网页中的图片采用懒加载模式,处理后才能返回正确的图片链接 + * 传入网页内容,返回带图片链接的网页内容 +##def ReturnRealAnswerContent(text=''): +###函数说明 + * 传入目标网页 + * 返回答案内容 +##def ReadPersonInfo(k="") +###函数说名 + * 传入用户答案页内容,返回一个字典,字典内为提取出的用户信息 + * 字典内容: + * Dict['ID_Asks'] + * Dict['ID_Answers'] + * Dict['ID_Posts'] + * Dict['ID_Collections'] + * Dict['ID_Logs'] + * Dict['ID_Followees'] + * Dict['ID_Followers'] + * Dict['ID_Visit'] + * Dict['ID_ID'] + * Dict['ID_Name'] + * Dict['ID_Sign'] +##def ReadCollectionInfo(k="") +###函数说明 + * 传入收藏夹主页页面内容,返回一个字典,字典内为提取出的收藏夹信息 + * 字典内容 + * Dict['title'] + * Dict['AuthorID'] + * Dict['AuthorName'] + * Dict['followerCount'] +##def FetchMaxAnswerPageNum(Content="") +###函数说明 + * 输入收藏夹或用户答案首页内容,返回答案页面最大值 +##def ReadAnswer(k='',url="",ID="",Name="") +###函数说明 + * k为答案内容,由returnRealAnswerContent()返回。url为答案地址,ID、Name为答主ID与用户名,校验用,置空意为答案页面由收藏夹传入 + * 返回一个字典,字典内为提取出的答案信息 + * 字典内容 + * 说明:当用户为匿名用户时ID置为404nofFound!,Name=匿名用户 + * PS:404nouFound这个ID都能存在是要闹那样? + * Dict["ID"] + * Dict["Sign"] + * Dict["AgreeCount"] + * Dict["CollectionCount"] + * Dict["CommitCount"] + * Dict["QuestionID"] + * Dict["AnswerID"] + * Dict["UpdateTime"] + * Dict["QuestionTitle"] + * Dict["Questionhref"] + * Dict["AnswerContent"] + * Dict["UserName"] + * Dict["QuestionTitle"] + * Dict["Questionhref"] + * Dict["AnswerID"] + * Dict["QuestionID"] +##OpenUrl(Request) +###函数说明 + *传入一个网页Request,返回网页内容,报错返回空字符串,如若网页打开失败会自动重读,超时重读3次,400系列错误直接返回空字符串,其余错误重读10次 + *返回两个字典,Dict为文件信息,用于生成回答集锦的头部,RequestDict为待抓取答案页面字典 + *Request请求使用urllib2.Request()进行制作 +##WorkForFetchFrontPageInfo(ID='',Collect='',PostHeader={}) +###函数说明 + *传入ID或Collection序号,都为空值报错退出,均有值则只读取ID。需要传入一个PostHeader字典,由returnPostHeader给出 + *读取首页信息,返回待抓取答案链接的网页字典RequestDict,键为从0开始递增的数字,值为一个列表,第一项是Request,第二项为待读取标记,默认为False + *若网页打不开则抛出一个NameError,直接退出 +##WorkForFetchUrl(RequestDict={},Page=0) +###函数说明 + *抓取RequestDict[Page]中的答案链接,抓完后将待读取的列表存入RequestDict[Page][0]中,RequestDict[Page][1]置True +##WorkForGetAnswer(RequestDict={},Page=0,ID='',Name="") +###函数说明 + *抓取RequestDict[Page]上的答案,抓完后将答案Dict置入RequestDict[Page][0]中,RequestDict[Page][1]置True + *ID用于区别收藏夹与用户,ID为空即为读取的是收藏夹内的内容 +##WorkForSuitUrl(RequestDict={},PostHeader={}) +###函数说明 + *将WorkForFetchUrl()返回的RequestDict处理为适合WorkForGetAnswer()读取的格式 + *返回一个答案Request字典,键为数字升序,值为答案所在网页的Request + *答案Request按读取时的页面顺序排列,1为第一页第一条,2为第一页第二条,21为第二页第一条,22为第二页第二条,41为第三页第一条,63为第四页第三条e.g. +##ThreadWorker_FetchUrl(MaxThread=5,RequestDict={}) +###函数说明 + *分线程抓取RequestDict中的全部答案链接,MaxThread为最大线程数 + *为任务调配函数,无返回值 +##ThreadWorker_GetAnswer(MaxThread=5,RequestDict={},PostHeader={},ID="",Name="") +###函数说明 + *类似ThreadWorker_FetchUrl() + *分线程抓取RequestDict中的全部答案内容,MaxThread为最大线程数 + *为任务调配函数,无返回值 +##CheckUpdate() +###函数说明 + *读取http://zhihuhelpbyyzy-zhihu.stor.sinaapp.com/ZhihuHelpUpdateTime.txt上的版本信息,发现新版本时打开url进行更新 +##ChooseTarget() +###函数说明 + *首屏 + *输入用户主页地址或收藏夹地址,返回 + *True,ID + *False,Collect + *识别失败直接退出 +##ShaoErBuYi(InfoDict={},IDFlag=True) +###函数说明 + *太长了,少儿不宜。。。 + *输入InfoDict和IDFlag, + *True为ID内容, + *False为收藏夹内容 + *返回根据InfoDict生成的Html文档头,内含MarkDown样式 +## WriteHtmlFile(Dict={},InfoDict={},IDFlag=True) +###函数说明 + *Dict为答案字典,InfoDict为用户信息字典,IDFlag=True为用户答案集锦,False为收藏夹 + *将答案写入到[用户名]的知乎回答集锦.html或[收藏夹名].html文件中 + *文件位置位于程序所在文件夹内 + diff --git "a/\347\250\213\345\272\217\350\257\264\346\230\216/\344\275\277\347\224\250\350\257\264\346\230\216.txt" "b/\347\250\213\345\272\217\350\257\264\346\230\216/\344\275\277\347\224\250\350\257\264\346\230\216.txt" new file mode 100755 index 0000000..8501986 --- /dev/null +++ "b/\347\250\213\345\272\217\350\257\264\346\230\216/\344\275\277\347\224\250\350\257\264\346\230\216.txt" @@ -0,0 +1,64 @@ + +���棺������������ɵ�����ʱ���Զ�������������ͼƬ�������������Ʒѵ�ͬѧ�뵽���������б����򡣣�����@yskin�������ղؼ�������ɵĵ�����һ��720M�����Ǹ���һ�¡������� + +������������������������������������������������������������������������ + +֪������ ʹ��˵��(�������û���/˽���ղؼ��빫���ղؼ�/���⾫����) + +��Ҫ���ص�֪���û���ҳ��ַ���ղؼ���ҳ��ַ���߻�����ҳ��ַ����ճ����ReadList.txt�У����棬Ȼ��˫������ZhihuHelp.exe������ͻ��Զ����û���/�ղؼ�/�����µľ��������ΪEpub��ʽ�ĵ������ڡ�֪���𰸼������ļ����� + +Epub��ʽ�ĵ����������ˢ�˶࿴ϵͳ�µ�Kindle���ֻ��࿴�Ķ��ͻ����������Ҳ���Խ�������ת��Ϊmobi��ʽ��ԭ��Kindle�Ϲۿ������Ƽ��� + +��ʼ����ǰ����Ҫ������һ������߳��������ֵԽ���ȡ���ٶ�Ҳ��Խ�죬���dz����޷���ȡ�𰸵Ŀ�����ҲԽ���߳�������ֵʱ����10M���绷���²��ԵĽ����������֮һ�Ĵ𰸶�ȡʧ�ܣ���Ĭ����20�����1�Ļ��������Ա�֤ÿ���ش���ץȡ��������ץȡ�ٶ��ץ@yolfilm�Ļش𼯽����Ҫ4000s������ + +------------------------------------------------------------------------ + +ר������ ʹ��˵����������֪��ר���� + +��Ҫ���ص�ר����ҳ��ַ����ճ����ReadList.txt�У�˫������ר������.exe���� + + +------------------------------------------------------------------------ + +���Mac/Linux�û���ʹ��˵�����Ʋ� +http://www.zhihu.com/question/22719537/answer/22733181 + +������������������������������������������������������������������������ + +������ +����������ݸ���ճ����ReadList.txt��Ϳ��Եõ�@yolfiml��@��ͭ��@����Լ����������ԡ�����Ц���Ҳ����Ѫ���Ĵ𰸼��� + + +http://www.zhihu.com/people/calliope +http://www.zhihu.com/people/yolfilm +http://www.zhihu.com/people/cai-tong +http://www.zhihu.com/collection/19619639 +http://www.zhihu.com/collection/19686512 + +����������Ӹ���ճ����ReadList.txt��Ϳ��Եõ�@yolfilm �����ġ��˼䡻ר����@������ ���ġ��⼧̸֮���ĵ����� + + +http://zhuanlan.zhihu.com/yolfilm +http://zhuanlan.zhihu.com/kuaijijixuanyi + +������������������������������������������������������������������������ + +Tips�� + +1������ͼƬ���ٶ��������ٵ�ԭ����ܺ���������һֱ�һ����أ������ڡ�������������ʱ��Դ��/��������_������������ʱ�ļ��С��ļ�������txt��ʽ������ʧ�ܵ�ͼƬ���Ӻϼ�������ֱ����Ѹ�����أ�Ȼ���������ɵ��ļ����ڡ�XXX_��֪���ش𼯽�/OEBPS/images���ļ����У������⵽�Ѿ�������ͼƬ�Ļ��Ͳ������ظ������� + +2����һ��ʵ���˴𰸵��������¹��ܣ�ֻ������޸Ĺ������»ش�Ĵ𰸣�Ȼ�󷵻�ȫ���𰸣�����֮������������ݿ⣬����ʵ�ֶ�֪���û��𰸵����ñ��棺�� + +3.��������������Դ�ļ��С����cover.jpg����ķ����գ������滻������jpgͼƬ + +4.��������ͬһ�����µĴ���һ�𣬰���ͬ����������䰴����ͬ��������֮����ͬ�����Ĵ�����ǰ�棬�� + +5��������ReadList.txt�е���ַ����ӡ�#����Ȼ��#������Լӱ�ע��������ڶ�ȡʱ�Զ����Ե���Щ��ע~ + +6.�����߳���������ÿ100kb/s��������ٿ�5���̣߳����������ٶ����100k/sʱ��5���̣߳�200k/s��10����500k/s��25�����Դ����� + +7.�����ø���ReadList.txt����ɶ��ͬѧ���Բο������û�������Ļش�http://zhi.hu/2xFz + +8.׼����ʼдͼ�ν��� + +9.��ӭ����bug���Ժ��޸�bug���ٶȾͿ�Ķ��ˣ�����ToDoһ���ᰡ~ \ No newline at end of file diff --git "a/\347\250\213\345\272\217\350\257\264\346\230\216/\346\233\264\346\226\260\345\216\206\345\217\262.txt" "b/\347\250\213\345\272\217\350\257\264\346\230\216/\346\233\264\346\226\260\345\216\206\345\217\262.txt" new file mode 100755 index 0000000..3ac7800 --- /dev/null +++ "b/\347\250\213\345\272\217\350\257\264\346\230\216/\346\233\264\346\226\260\345\216\206\345\217\262.txt" @@ -0,0 +1,87 @@ +1.6��˵���� +д��ʱ����̫�������˸��µ��Ķ��ˣ������� +˵˵�һ��ǵõĸ��°�~ +1.ȥ�������html�Ĺ��ܣ�ֱ�ӽ������ΪEpub +2.�����˼�ס���빦�ܣ�һ��������ɺ��Ժ�������ֱ��һ·�س����� +3.ʶ��ʧ�ܵ���ҳֱ���������δ�ɹ��򿪵�ҳ��.txt���У����Ҳ�����Ҫ�û��س�ȷ�ϣ���ʼ������֮��Ϳ����ó����Լ��������ˣ��� +4.��л@������ͬѧ�ľ����Ͷ��٣��⼸���æ�Ÿ���ҵ�ˣ����µıȽ����������ٴ���Ǹ������ +5.ȥ����EpubĿ¼���ļ�϶������������� +6.���⡢�ղؼк�ר������н��ܵĻ����ڷ�������ʾ���� +7.�޸���һ�����������³��������bug��bug����@���˵��� +8.�����˴�������������ʾģʽ����һ����Ŀ���ǰѴ���ѹ����1500������ +9.֪������ר�������Ϊ��ר�����֣������ĸ������Ľ��������㰡 +10.������ReadList.txt�е���ַ����ӡ�#����Ȼ��#������Լӱ�ע��������ڶ�ȡʱ�Զ����Ե���Щ��ע~ +11.http://zhi.hu/3pki������Ѿ������õĵ�����ϼ����Ƽ�����ǰ��ȥ�Ƕ����ң�֪�������������װ�~ +12.Ϊ���ܸ��õ����ش�ͼ�������Ұ�������ȴ�ʱ�����ó���15s�������������ȴ�ʱ�佫��ﵽ150s����Ҳ���������ӣ�������ʱ��ȴ���ʱ���Ƚϳ����� + + +1.5.1��˵���� +1.�޸��˽�mimetype�ļ�ѹ�����εĴ���bug����via@Francis Fung +2.�������趨����ͼƬ�߳���ģ�飬1.5������߳�ֱ�������50����ͼƬ��������ʧ�ܣ����ڿ��Ը�����������Զ����߳����� +3.������һ������ͼƬ��û�����ض�һ����Ϊ�����ص�һ��bug���������� + +������������������������������������������������ +1.5��˵��: +1.������һ��Epub_zhuanlan�ű����ڽ�ר������Ϊ������ +2.������һ��ͼƬ�أ����е�������ʱ�ļ��й���һ��ͼƬ�أ��ӿ�ͼƬ�����ٶȣ������֪����������ѹ�� +3.��л@Molly�� С���ľ������ٺ�~ +4.֪��ר�������ʹ����ڲ�״̬������ӿڵ�������ʧЧ�ĸ��ʷdz��������Ұ�ר�����ܶ����˳�������������鷳�ˣ������������ +5.���ڳ����Ѿ��dz��ȶ������ԣ�����~�Ժ����ʽ������֪��������~���� + +������������������������������������������������ +1.4��˵���� +1.�������ΪEpub�Ĺ��� +2.�����޷���ȡ˽���ղؼе�bug +3.���������в��ܴ�����bug +4.����������ͬ�����µĴ𰸶���һ�𣬹��ܽ���via@Henry + +������������������������������������������������ +1.3��˵���� +1.�����ϰѳ�����д��һ�顣���� +1.1���������¹��ܣ� +1.1.1����@Ī���ߵĽ��飬����׽���Ĵ𰸴����ڡ�֪���𰸼������ļ����У����ں������������� +1.1.2���Ա��滰���µľ����ش� +1.1.3�Զ������½cookie +1.2�����������޸� +1.2.1תΪʹ������ƥ����ҳ���� +1.2.2ֱ�����б��ж�ȡ�û��𰸣�����20�� +1.2.3�����˼�⡺��ֹת�ء����ܣ�����û���ѡ�ˡ���ֹת�ء������ֽ����ٶ�ȡ�ô� +1.2.4Ϊ�˱�֤��©�𰸣�תΪʹ�����ݿ��¼�û����ղؼ��ڵĴ��б��������Ͳ��ܼ�¼�ղؼ���𰸵�λ���ˣ������ڵ����ղؼ����û���ʱ���ᰴ����ͬ˳����������ǰ����ӵ��ղؼ��е�˳������ +1.2.5����ҳ�򲻿�ʱ���ڳ������������Ϣ����֮�������´򿪣��Ա���֪���������ͽ��ʹ�ʧ�ܵĸ��� +1.2.6python�ű���Mac��Linux������Ҳ����ʵ�ּ����� + +������������������������������������������������ +1.2��˵��: +�Գ��������һЩ΢�� +#������һ��һֱ��ʾ���³����bug +#����@Ī���ߵĽ��飬��ƥ����ַ���ɹ�ʱ������ֱ���˳������������ʾ�����û�����س�ȷ�Ϻ�������� + +������������������������������������������������ +1.1��˵���� +���������ݿ��ɱ�����Ӵ�©����·�ˡ����� +��ʵ���ǻ�©�𣬵��ǣ�ÿ�����У����ݿⶼ������гɹ���ȡ�Ĵ𰸱���������������ʱ��ֻ��ȡ����û��ȡ�ɹ��Ĵ𰸡���Ҳ����˵�����ų������б��������ӣ�©���ʽ�����ָ�����½�������©������30%�Ļ�����һ����30%���ڶ��ξ�ֻ��9%��0.3*0.3�����������ٶ�ȡ��©���ʾ�ֻ��2.7%��0.3*0.3*0.3����������©���ʣ������ǿ��Խ��ܵ�~ +����©���Ĵ����Ӷ��ڡ�δ�ɹ���ȡ��ҳ���б�.txt������Խ���ж�©����~ +�������ݿ�֮����ŵ��dz���ֻ�����������𰸣�ʵ���˴𰸵����ñ��棬ȱ�������д𰸶����������˴�������һ�̣�û������ʱ���IJ��������������������������ǰ���԰�ZhihuDateBase.dbɾ�������������ݿ����Ҳ������ݾͻ�������ץȡ���´��� +������һ��cookie����ץȡ���˻ش���߹����ղؼ�ģʽ�µ�½ʧ��ʱ�Զ��������cookieαװ��¼,��Ч��һ���£�zhihu.com��ģ�������������˽���ղؼ�ģʽ��ģʽѡ�����������2���� + +��л@������ �ľ�����@��� ���Ƽ���лл��ҵ��Ͽɣ�лл(��_��) + +��ӭ��zhi.hu/0Jo4�����۴����Ὠ��~�һ�Ŭ����~ + + +������������������������������������������������ +1.0��˵�� +��Ҫ��ȡ���û���ҳ��ַ���ղؼ���ҳ��ַճ����ReadList.txt�Ȼ�󱣴桢˫��ZhihuHelp��ʼ���о��� + +��ʼ����ǰ����Ҫ������һ������߳��������ֵԽ���ȡ���ٶ�Ҳ��Խ�죬���dz����޷���ȡ�𰸵Ŀ�����ҲԽ���߳�������ֵʱ����10M���绷���²��ԵĽ����������֮���Ĵ𰸶�ȡʧ�ܣ���Ĭ����20�����1�Ļ��������Ա�֤ÿ���ش���ץȡ��������ץȡ�ٶ��ץ@yolfilm�Ļش𼯽����Ҫ4000s������ + +��������WTFPLЭ�鷢��������ʱ360������������һ�£����ù�����Ҫ���͵��ֹ�����ַ�� + +���������ӵ�4��1�ţ���д������� + +1.0�棬�ų� + +����~ + + +������������������������������������������������������������������������ \ No newline at end of file diff --git "a/\347\250\213\345\272\217\350\257\264\346\230\216/\347\237\245\344\271\216\345\212\251\346\211\213\344\273\243\347\240\201\346\236\204\345\273\272\346\200\235\350\267\257.md" "b/\347\250\213\345\272\217\350\257\264\346\230\216/\347\237\245\344\271\216\345\212\251\346\211\213\344\273\243\347\240\201\346\236\204\345\273\272\346\200\235\350\267\257.md" new file mode 100644 index 0000000..54acc4c --- /dev/null +++ "b/\347\250\213\345\272\217\350\257\264\346\230\216/\347\237\245\344\271\216\345\212\251\346\211\213\344\273\243\347\240\201\346\236\204\345\273\272\346\200\235\350\267\257.md" @@ -0,0 +1,96 @@ +#知乎助手构建思路 +##前言 +目前代码行数总计已经超过了1000行,现在已经很难实现对全部代码的掌控,以至于在更新软件时要么重新构建,要么就是留下一堆bug。自认为自己是很懒的那种人,所以必须要强迫自己一下。 + +这次更新1.7版,每更新一个函数就会在这里进行一次记录,记录函数的功能、输入、输出,为以后更新留下方便 +##软件目标 +1. 制作指定用户的答案集锦 +2. 制作制定收藏夹的答案集锦 +3. 制作制定话题下的所有精华内容的集锦 +4. 制作制定专栏的集锦 +5. 制作自定义内容的书籍(自定义版的知乎周刊) + + * 内容包括不限于 + 1. 指定回答 + 2. 指定问题下按某些条件过滤后的回答 + 3. 指定专栏 + 4. 指定专栏文章 + 5. 指定用户、收藏夹内的回答 + * 且:书籍内容顺序可自定义 + +* 书籍附属特性: + + 1. 默认下载小图,对于指定问题可选择下载大图 + 2. 在多看环境下双击可点开图片查看 + 3. 可分章节,每章前可有引语 + 4. 用户头像圆边化 + +* 程序附属特性: + + 1. 准确的报错 + 2. 代码解耦 +##程序模块目录 + * 设置模块 + * 读取并储存设定 + * 登陆模块 + * 目标链接识别 + * 内容抓取 + * 数据储存 + * 中间输出部分 + * 将读取结果以XML的格式输出出来 + * 生成目录 + * 软件将以生成目录为准生成最终版的电子书 + * 目录主要内容为章节、章节引言、内容顺序、高清图选项 + * 生成电子书 + * 图片下载 + * 目录生成 + * 合并压缩 + +##程序具体实现 + * 设置模块 + * 使用Qt辅助生成设置 + * 使用XML作为中间交换文件 + * XML格式 + * <root> + * <Book> + * <BookType> + * 若为用户、专栏、收藏、话题则不必进一步进行处理 + * <Title> + * 书名,可以自动生成 + * <BookPreviewCover> + * 略缩图 + * 指显示在多看首页上的图书预览图,真实封面要在BookFrontPage里设定 + * <BookFrontPage> + * 图书封页 + * <BookLogo> + * 封面图片 + * <BookIntroduce> + * 100字以内的描述 + * <BookDescripe> + * 图书引言页 + * 对书籍内容的描述 + * <Content> + * <Url> + * 地址 + * 自动识别类型,允许混排 + * 分情况设定 + * 收藏、话题没有FilterRule + * 用户、专栏只有一个Url地址 + * 自定义类型中可以有多个Url,每个Url可以有自己独立的Filter + * <FilterRule> + * 可以单独设置,也可以批量设置 + * <Agree> + * <above> + * 大于等于 + * <below> + * 小于等于 + * <Comment> + * <above> + * <below> + * <Login> + * <UserID> + * <UserName> + * 自动填充 + * 保存为默认值 + * 初版软件一次只制作一本电子书 + diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/88x31.png" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/88x31.png" new file mode 100755 index 0000000..49f272f Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/88x31.png" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/cover.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/cover.jpg" new file mode 100755 index 0000000..d4ec930 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/cover.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/stylesheet.css" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/stylesheet.css" new file mode 100755 index 0000000..f2987c8 --- /dev/null +++ "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/stylesheet.css" @@ -0,0 +1,336 @@ +/* GitHub stylesheet for MarkdownPad (http://markdownpad.com) */ + /* Author: Nicolas Hery - http://nicolashery.com */ + /* Version: b13fe65ca28d2e568c6ed5d7f06581183df8f2ff */ + /* Source: https://github.com/nicolahery/markdownpad-github */ + + /* RESET + =================================================================================================*/ + /*****个人测试***/ + + .zh-fav-head-title{ + font-size:18px;/*收藏夹标题大小*/; + } +.zm-item-title{ + color:#259/*知乎问题标题颜色*/; + font-size:18px; +} + +.zh-list-title .zm-editable-editor-input{font-size:18px;font-weight:700}/*设定列表名、编辑框的宽*/ + +.zg-bull{ + padding:0 3px; + color:#BBB; + display:inline-block; + font-family:Arial + }/*添加评论 修改记录 中间的点*/ +h3.zm-tiem-answer-author-info/*答主用户名与签名*/ + { + color:#222 + font-family: 'Helvetica Neue',Helvetica,Arial,Sans-serif; + font-size:13px; + } +strong { + font-weight: bold; + +} +a:link {text-decoration:none; color:#259 } +a:visited {text-decoration:none; color:#259 } +a:hover {text-decoration:underline; color:#259 } +a:active {text-decoration:underline; color:#259 } +.zm-item-rich-text +{ + font-size: 13px; + color:#222; +} + +.zm-item-comment-el{ + color:#999/*添加品论、关注问题等的颜色*/; + font-size:13px; +} +.answer-content +{ + postion:relative; + top:-25px; +} +.update{ + float:left; +} + /************/========================================================= + /********/ + + html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video { + margin: 0; + padding: 0; + border: 0; + } + + /* BODY + =============================================================================*/ + + body { + font-family: Helvetica, arial, freesans, clean, sans-serif; + font-size: 14px; + line-height: 1.6; + color: #333; + background-color: #fff; + padding: 20px; + max-width: 960px; + margin: 0 auto; + } + + body>*:first-child { + margin-top: 0 !important; + } + + body>*:last-child { + margin-bottom: 0 !important; + } + + /* BLOCKS + =============================================================================*/ + + p, blockquote, ul, ol, dl, table, pre { + margin: 15px 0; + } + + /* HEADERS + =============================================================================*/ + + h1, h2, h3, h4, h5, h6 { + margin: 20px 0 10px; + padding: 0; + font-weight: bold; + -webkit-font-smoothing: antialiased; + } + + h1 tt, h1 code, h2 tt, h2 code, h3 tt, h3 code, h4 tt, h4 code, h5 tt, h5 code, h6 tt, h6 code { + font-size: inherit; + } + + h1 { + font-size: 28px; + color: #000; + } + + h2 { + font-size: 24px; + border-bottom: 1px solid #ccc; + color: #000; + } + + h3 { + font-size: 18px; + } + + h4 { + font-size: 16px; + } + + h5 { + font-size: 14px; + } + + h6 { + color: #777; + font-size: 14px; + } + + body>h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h4:first-child, body>h5:first-child, body>h6:first-child { + margin-top: 0; + padding-top: 0; + } + + a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { + margin-top: 0; + padding-top: 0; + } + + h1+p, h2+p, h3+p, h4+p, h5+p, h6+p { + margin-top: 10px; + } + + /* LINKS + =============================================================================*/ + + a { + color: #4183C4; + text-decoration: none; + } + + a:hover { + text-decoration: underline; + } + + /* LISTS + =============================================================================*/ + + ul, ol { + padding-left: 30px; + } + + ul li > :first-child, + ol li > :first-child, + ul li ul:first-of-type, + ol li ol:first-of-type, + ul li ol:first-of-type, + ol li ul:first-of-type { + margin-top: 0px; + } + + ul ul, ul ol, ol ol, ol ul { + margin-bottom: 0; + } + + dl { + padding: 0; + } + + dl dt { + font-size: 14px; + font-weight: bold; + font-style: italic; + padding: 0; + margin: 15px 0 5px; + } + + dl dt:first-child { + padding: 0; + } + + dl dt>:first-child { + margin-top: 0px; + } + + dl dt>:last-child { + margin-bottom: 0px; + } + + dl dd { + margin: 0 0 15px; + padding: 0 15px; + } + + dl dd>:first-child { + margin-top: 0px; + } + + dl dd>:last-child { + margin-bottom: 0px; + } + + /* CODE + =============================================================================*/ + + pre, code, tt { + font-size: 12px; + font-family: Consolas, "Liberation Mono", Courier, monospace; + } + + code, tt { + margin: 0 0px; + padding: 0px 0px; + white-space: nowrap; + border: 1px solid #eaeaea; + background-color: #f8f8f8; + border-radius: 3px; + } + + pre>code { + margin: 0; + padding: 0; + white-space: pre; + border: none; + background: transparent; + } + + pre { + background-color: #f8f8f8; + border: 1px solid #ccc; + font-size: 13px; + line-height: 19px; + overflow: auto; + padding: 6px 10px; + border-radius: 3px; + } + + pre code, pre tt { + background-color: transparent; + border: none; + } + + kbd { + -moz-border-bottom-colors: none; + -moz-border-left-colors: none; + -moz-border-right-colors: none; + -moz-border-top-colors: none; + background-color: #DDDDDD; + background-image: linear-gradient(#F1F1F1, #DDDDDD); + background-repeat: repeat-x; + border-color: #DDDDDD #CCCCCC #CCCCCC #DDDDDD; + border-image: none; + border-radius: 2px 2px 2px 2px; + border-style: solid; + border-width: 1px; + font-family: "Helvetica Neue",Helvetica,Arial,sans-serif; + line-height: 10px; + padding: 1px 4px; + } + + /* QUOTES + =============================================================================*/ + + blockquote { + border-left: 4px solid #DDD; + padding: 0 15px; + color: #777; + } + + blockquote>:first-child { + margin-top: 0px; + } + + blockquote>:last-child { + margin-bottom: 0px; + } + + /* HORIZONTAL RULES + =============================================================================*/ + + hr { + clear: both; + margin: 15px 0; + height: 0px; + overflow: hidden; + border: none; + background: transparent; + border-bottom: 4px solid #ddd; + padding: 0; + } + + /* TABLES + =============================================================================*/ + + table th { + font-weight: bold; + } + + table th, table td { + border: 1px solid #ccc; + padding: 6px 13px; + } + + table tr { + border-top: 1px solid #ccc; + background-color: #fff; + } + + table tr:nth-child(2n) { + background-color: #f8f8f8; + } + + /* IMAGES + =============================================================================*/ + + img { + max-width: 100% + } diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\350\257\264\346\230\216.txt" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\350\257\264\346\230\216.txt" new file mode 100755 index 0000000..7257bda --- /dev/null +++ "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\350\257\264\346\230\216.txt" @@ -0,0 +1,2 @@ +stylesheet.cssΪCSS�ļ������ڿ�����ҳ��ʾ��ʽ��cover.jpgΪ��������棬���������滻���滻��֮�������cover.jpg���У���88x31.png�ǰ�Ȩ�����������ƶ����滻�����������ܻ��޷����С����� +��ɾ�Ļ�����һ�������������ݿ��ƹ�ȥ�ͺÿ�~ \ No newline at end of file diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\223\346\240\217ico.ico" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\223\346\240\217ico.ico" new file mode 100755 index 0000000..cb75383 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\223\346\240\217ico.ico" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\273\347\253\231ico.ico" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\273\347\253\231ico.ico" new file mode 100755 index 0000000..5a0357d Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\344\270\273\347\253\231ico.ico" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\345\221\250\345\210\212ico.ico" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\345\221\250\345\210\212ico.ico" new file mode 100755 index 0000000..8aa7dca Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\345\221\250\345\210\212ico.ico" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\346\227\245\346\212\245ico.ico" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\346\227\245\346\212\245ico.ico" new file mode 100755 index 0000000..ca1e385 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/ico\346\240\274\345\274\217\357\274\214\345\274\200\345\217\221\347\224\250/\347\237\245\344\271\216\346\227\245\346\212\245ico.ico" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217.jpg" new file mode 100755 index 0000000..3e475c6 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217\345\211\257\346\234\254.png" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217\345\211\257\346\234\254.png" new file mode 100755 index 0000000..52a39b2 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\223\346\240\217\345\211\257\346\234\254.png" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\273\347\253\231.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\273\347\253\231.jpg" new file mode 100755 index 0000000..b8e4e0e Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\344\270\273\347\253\231.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\345\221\250\345\210\212.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\345\221\250\345\210\212.jpg" new file mode 100755 index 0000000..6668737 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\345\221\250\345\210\212.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\214\207\345\215\227.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\214\207\345\215\227.jpg" new file mode 100755 index 0000000..a1f12d7 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\214\207\345\215\227.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245.jpg" new file mode 100755 index 0000000..19d9a2a Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245\345\211\257\346\234\254.png" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245\345\211\257\346\234\254.png" new file mode 100755 index 0000000..2c2069e Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\344\271\216\346\227\245\346\212\245\345\211\257\346\234\254.png" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\350\257\206\351\235\222\345\271\264.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\350\257\206\351\235\222\345\271\264.jpg" new file mode 100755 index 0000000..4622500 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\237\245\350\257\206\351\235\222\345\271\264.jpg" differ diff --git "a/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\273\247\346\226\260\345\220\233\347\232\204\345\216\237\345\244\264\345\203\217.jpg" "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\273\247\346\226\260\345\220\233\347\232\204\345\216\237\345\244\264\345\203\217.jpg" new file mode 100755 index 0000000..fb9b7f3 Binary files /dev/null and "b/\350\276\205\345\212\251\347\232\204ico\344\270\216\345\260\201\351\235\242\346\226\207\344\273\266/\347\224\265\345\255\220\344\271\246\345\210\266\344\275\234\350\265\204\346\272\220\346\226\207\344\273\266\345\244\271/\351\231\204\357\274\232\347\237\245\344\271\216\345\244\247\345\233\276/\347\273\247\346\226\260\345\220\233\347\232\204\345\216\237\345\244\264\345\203\217.jpg" differ