09月22日, 2014 3529次
import urllib
import urllib2
from urllib import unquote
def download(url,opi, passName=None):
if passName:
fileName = passName
urllib.urlretrieve(attachURL, fileName)
else:
r = urllib.urlopen(url)
if r.info().has_key('Content-Disposition'):
fileName = r.info()['Content-Disposition'].split('filename=')[1]
fileName = fileName.replace('"', '').replace("'", "")
fileName=unquote(fileName).decode('utf8')
#print fileName
elif r.url != url:
# if we were redirected, the real file name we take from the final URL
from os.path import basename
from urlparse import urlsplit
fileName = basename(urlsplit(r.url)[2])
f = open('./files/' +str(opi)+fileName, 'wb')
f.write(r.read())
f.close()
#print "File:", fileName,"downloaded"
print "File:", fileName.encode("GBK", 'ignore') ,"downloaded"
'''
def download2(url,opi, passName=None):
if passName:
fileName = passName
urllib2.urlretrieve(attachURL, fileName)
else:
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko')
request.add_header('Referer', 'https://xxxxx')
request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7')
r = urllib2.urlopen(request)
#response.headers['Content-Type']
if r.headers().has_key('Content-Disposition'):
fileName = r.info()['Content-Disposition'].split('filename=')[1]
fileName = fileName.replace('"', '').replace("'", "")
fileName=unquote(fileName).decode('utf8')
print fileName
elif r.url != url:
# if we were redirected, the real file name we take from the final URL
from os.path import basename
from urlparse import urlsplit
fileName = basename(urlsplit(r.url)[2])
f = open(str(opi)+fileName, 'wb')
f.write(r.read())
f.close()
print "File:", fileName,"downloaded"
'''
op_url=""
op_url_base=""
'''
#763806 20170910-1751
for i in range(763549,769999):
201709111217-764103
760000,760698
760698,763549
760549-760690
750000,757966
'''
for i in range(776810,776910):
#for i in range(763549,763550):
op_url = op_url_base + str(i) + ""
print op_url
download(op_url,i)改进一下
def download2(url,opi, passName=None):
t = time()
if passName:
fileName = passName
urllib2.urlretrieve(attachURL, fileName)
else:
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko')
request.add_header('Referer', 'https://xxx/')
request.add_header('Cookie', 'ASP.NET_SessionId=qfhwwnbglzwos5gjnvn4hq3p; ImageCheck=6DV7')
r = urllib2.urlopen(request)
fileName = r.headers['Content-Disposition'].split('filename=')[1]
fileName = fileName.replace('"', '').replace("'", "")
fileName = unquote(fileName).decode('utf8')
f = open('./files/' +str(opi)+fileName, 'wb')
f.write(r.read())
f.close()
print "File:", fileName.encode("GBK", 'ignore') ,"downloaded"
print time() -t
op_url="x"
op_url_base="x"
i=750046
op_url = op_url_base + str(i) + ""
print op_url
download2(op_url,i)
暂无留言,赶快评论吧