# -*- coding: utf-8 -*- import httplib, urlparse, urllib, os import shutil import tempfile import random WU_SAYLOUD = True # deprecated - not used class WUContentLengthError(Exception):pass class WUURLError(Exception):pass class WURedirectLimitError(Exception):pass def createtempname(filename=''): return os.path.join( tempfile.gettempdir(), ''.join([ 'tmp_webutils_', ''.join(random.sample('qwertyuiopasdfghjklzxcvbnm11234567890',10)), filename ]) ) def getContentType(url, allowed = []): """ mixed getContentType(str url [, list allowed]) Returns Content-Type of url. If list of allowed is not empty returns True if mime is in list or False otherwise. """ p = urlparse.urlparse(url) h = httplib.HTTPConnection(p[1]) h.request('HEAD',p[2]) resp = h.getresponse() ct = resp.getheader('Content-Type').split(';')[0] if len(allowed) > 0: if ct in allowed: return True return False return ct ### /getContentType def getFile(url, saveAs, minSize=0, data=None): """ getFile(str URL, str saveAs[, int minSize]) -> boolean downloadx URL given and saves it as saveAs. minimumFileSize is optional. You can set a minimum file size if server doesnt respond with standard 404 (not found) code but with 200 (ok) code and some kind of message like "image not found" or something like that. """ p = urlparse.urlparse(url) h = httplib.HTTPConnection(p[1]) h.request('HEAD',p[2]) resp = h.getresponse() if resp.status == 200: if minSize > 0: if int(resp.getheader('content-length')) < int(minSize): raise WUContentLengthError('Content-length header\'s value ' \ 'is %s (%s required)' % (resp.getheader('content-length'), minSize)) try: tempfile = createtempname(os.path.splitext(saveAs)[1]) opener = urllib.FancyURLopener() if data == None: opener.retrieve(url, tempfile) else: if type(data) is not str: data = urllib.urlencode(data) opener.retrieve(url, tempfile, data=data) except IOError: os.remove(tempfile) print 'can\'t get %s' % url return False except: os.remove(tempfile) raise if os.path.getsize(tempfile) < minSize: os.remove(tempfile) raise WUContentLengthError('Downloaded file\'s size is ' \ 'is %s (%s required)' % (os.path.getsize(tempfile), minSize)) else: shutil.copy(tempfile, saveAs) os.remove(tempfile) return True ### /getFile def resourceExists(url, allowRedirects=False, limit=10): """ Checks if resource exists """ while limit > 0: p = urlparse.urlparse(url) if p[2] == '': getPath = '/' else: getPath = p[2] if p[4] != '': # GET query getPath += '?%s' % p[4] h = httplib.HTTPConnection(p[1]) h.request('HEAD',getPath) resp = h.getresponse() if resp.status == 200: return True elif allowRedirects: if resp.getheader('location') != None: url = resp.getheader('Location') limit -= 1 else: return False else: return False raise WURedirectLimitError('Redirect Limit (%s) exceeded.' % limit) ### /resourceExists def getStatus(url): p = urlparse.urlparse(url) if p[2] == '': getPath = '/' else: getPath = p[2] if p[4] != '': # GET query getPath += '?%s' % p[4] h = httplib.HTTPConnection(p[1]) h.request('HEAD',getPath) resp = h.getresponse() return resp.status ### /getStatus def getTrueUrl(url, limit=10): """ Gets true url (after all redirects) """ while limit > 0: p = urlparse.urlparse(url) if p[2] == '': getPath = '/' else: getPath = p[2] if p[4] != '': # GET query getPath += '?%s' % p[4] h = httplib.HTTPConnection(p[1]) h.request('HEAD',getPath) resp = h.getresponse() if resp.status == 200: return url else: if resp.getheader('location') != None: url = resp.getheader('Location') limit -= 1 else: raise WUURLError('%s status is <%s>' % (url, resp.status)) raise WURedirectLimitError('Redirect Limit (%s) exceeded.' % limit) ### /resourceExists class FileDownloader: """ FileDownloader fd = FileDownloader() fd.download(url, saveAs, minimumSize = 0) -> boolean Methods that you can override: FileDownloader.fileIsTooSmall(url, fileName) -> boolean Is called if downloaded file is smaller than minimumSize. Should return False. FileDownloader.downloadCompleted(url, fileName) -> boolean Is called when download is complete and file size is bigger than minimumSize. Should return True. FileDownloader.onFinish(validResponseCode, validSize, validCompleted) -> boolean Should return False if any of passed arguments is false or True otherwise. """ def __init__(self): pass def download(self, url, saveAs, minSize = 0): self.validSize = True self.validCompleted = False p = urlparse.urlparse(url) h = httplib.HTTPConnection(p[1]) h.request('HEAD',p[2]) r = h.getresponse() self.responseCode = r.status h.close() self.__getFile(url, saveAs, minSize) return self.onFinish() def __getFile(self, url, saveAs, minSize): try: tempfile = createtempname(os.path.splitext(saveAs)[1]) opener = urllib.FancyURLopener() res = opener.retrieve(url, tempfile) except IOError: print 'cant get %s' % url return False if os.path.getsize(tempfile) < minSize: os.remove(tempfile) self.validSize = self.fileIsTooSmall(url, saveAs) return False else: shutil.copy(tempfile, saveAs) os.remove(tempfile) self.validCompleted = self.downloadCompleted(url, saveAs) return True # hooks def fileIsTooSmall(self, url, fileName): print 'TOO SMALL!' return False def downloadCompleted(self, url, fileName): print 'DOWNLOADED!' return True def onFinish(self): print 'Exit' if not self.validSize or not self.validCompleted: return False else: return True ### /FileDownloader