Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def getPage (self, url, requestHeader = []) :
resultFormate = StringIO.StringIO()
fakeIp = self.fakeIp()
requestHeader.append('CLIENT-IP:' + fakeIp)
requestHeader.append('X-FORWARDED-FOR:' + fakeIp)
try:
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url.strip())
curl.setopt(pycurl.ENCODING, 'gzip,deflate')
curl.setopt(pycurl.HEADER, 1)
curl.setopt(pycurl.TIMEOUT, 120)
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
curl.setopt(pycurl.SSL_VERIFYHOST, 0)
curl.setopt(pycurl.HTTPHEADER, requestHeader)
curl.setopt(pycurl.WRITEFUNCTION, resultFormate.write)
curl.perform()
headerSize = curl.getinfo(pycurl.HEADER_SIZE)
curl.close()
header = resultFormate.getvalue()[0 : headerSize].split('\r\n')
body = resultFormate.getvalue()[headerSize : ]
except Exception, e:
header = ''
body = ''
return header, body
# Set the output file
self.curl_obj.fp = open(self.output_file, "wb")
self.curl_obj.setopt(pycurl.WRITEDATA, self.curl_obj.fp)
self.curl_obj.perform()
self.delete_temp()
sys.exit()
# All errors are logged in the logfile
log=open("/var/log/downloader.log","a") # Opened in append mode (maintains all logs)
num_proc=10 # This variable decides the number of concurrent processes.
pid=[] # This stores the list of PIDs of the children (processes)
self.curl_obj.setopt(pycurl.TIMEOUT,60*10) # Limits the maximum download time per download-connection to 10 minutes (This could be changed for slower connections)
lim_l=0 # lower limit of the byte-range for download
lim_u=self.chunk # upper limit of the byte-range for download
i=1
while lim_l<=self.size :
# Create a temporary filename
temp_output=os.path.join(self.dir_name,"output"+str(i))
#print temp_output
# Ensure that it doesn't already exists
# If it exists and its size is the same as that of the chunk downloaded each time, then go to the next chunk
if os.path.exists(temp_output) and os.path.getsize(temp_output)==self.chunk:
#print str(i)+" MB already downloaded"
i=i+1
r=str(lim_l)+"-"+str(lim_u-1)
def curl_setup(self, proxy=None, timeout=0):
"""
set curl options
"""
crl = pycurl.Curl()
crl.setopt(pycurl.USERAGENT, user_agent())
crl.setopt(pycurl.FOLLOWLOCATION, True)
crl.setopt(pycurl.CAINFO, certifi.where())
if proxy:
crl.setopt(pycurl.PROXY, proxy)
if timeout: # 0 = wait forever
crl.setopt(pycurl.CONNECTTIMEOUT, timeout)
crl.setopt(pycurl.TIMEOUT, timeout)
if self.verbose and not self.silent:
crl.setopt(pycurl.VERBOSE, True)
self.cobj = crl
self.multi.handles = []
self.multi.setopt(pycurl.M_SOCKETFUNCTION, self.socket)
self.multi.setopt(pycurl.M_TIMERFUNCTION, self.rescheduleTimer)
self.share = pycurl.CurlShare()
self.share.setopt(pycurl.SH_SHARE, pycurl.LOCK_DATA_DNS)
self.poolSize = poolSize
self.loop = pyev.default_loop()
self.queue = []
self.numInFlight = 0
for i in range(self.poolSize):
c = pycurl.Curl()
c.timeout = None
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 300)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt(pycurl.FRESH_CONNECT, 1)
c.setopt(pycurl.FORBID_REUSE, 1)
c.setopt(pycurl.SHARE, self.share)
self.multi.handles.append(c)
self.pool = self.multi.handles[:]
SIGSTOP = (signal.SIGPIPE, signal.SIGINT, signal.SIGTERM)
self.watchers = [pyev.Signal(sig, self.loop, self.signal) for sig in SIGSTOP]
self.evTimer = pyev.Timer(0, 1.0, self.loop, self.timer)
self.timeoutTimer = pyev.Timer(60.0, 60.0, self.loop, self.checkTimeouts)
#self.watchers.append(pyev.Idle(self.loop, self.idle))
curl.setopt(pycurl.POST, 1)
if params:
if post:
curl.setopt(pycurl.POSTFIELDS, urllib.urlencode(params))
else:
url = "?".join((url, urllib.urlencode(params)))
curl.setopt(pycurl.URL, str(url))
if username and password:
curl.setopt(pycurl.USERPWD, "%s:%s" % (str(username), str(password)))
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.TIMEOUT, 15)
curl.setopt(pycurl.CONNECTTIMEOUT, 8)
curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_1_0)
content = StringIO.StringIO()
hdr = StringIO.StringIO()
curl.setopt(pycurl.WRITEFUNCTION, content.write)
curl.setopt(pycurl.HEADERFUNCTION, hdr.write)
print curl, url, header
try:
curl.perform()
except pycurl.error, e:
raise e
http_code = curl.getinfo(pycurl.HTTP_CODE)
if http_code != 200:
crl.setopt(pycurl.PROXYPORT, rel_proxy_port)
# set cookie
rel_cookie_file = cookie_file or self.cookie_file
if rel_cookie_file:
crl.setopt(pycurl.COOKIEFILE, rel_cookie_file)
crl.setopt(pycurl.COOKIEJAR, rel_cookie_file)
# set ssl
crl.setopt(pycurl.SSL_VERIFYPEER, 0)
crl.setopt(pycurl.SSL_VERIFYHOST, 0)
crl.setopt(pycurl.SSLVERSION, 3)
crl.setopt(pycurl.CONNECTTIMEOUT, 10)
crl.setopt(pycurl.TIMEOUT, 300)
crl.setopt(pycurl.HTTPPROXYTUNNEL,1)
rel_header = header or self.header
if rel_header:
crl.setopt(pycurl.HTTPHEADER, rel_header)
crl.fp = StringIO.StringIO()
if isinstance(url, unicode):
url = str(url)
crl.setopt(pycurl.URL, url)
crl.setopt(crl.WRITEFUNCTION, crl.fp.write)
try:
crl.perform()
except Exception, e:
raise CurlException(e)
os.rename(path, newpath)
print("rename" + newpath)
bak_path = newpath
print(path)
fields = [('file', (c.FORM_FILE, newpath.encode('gbk'))),
('token', token),
('key', key),
('x:md5', key)]
c.setopt(c.VERBOSE, 1)
c.setopt(c.URL, "http://upload.qiniu.com/")
c.setopt(c.HTTPPOST, fields)
c.setopt(c.NOPROGRESS, 0)
c.setopt(c.PROGRESSFUNCTION, progress)
c.setopt(pycurl.CONNECTTIMEOUT, 60)
c.setopt(pycurl.TIMEOUT, 600)
try:
info = c.perform()
print(info)
print(fields)
if c.getinfo(c.HTTP_CODE) == 200:
os.rename(newpath, path)
print("rename" + path)
return True
except pycurl.error as e:
print(e)
sys.stdout.write("File no Found!")
return False
if os.path.exists(newpath):
os.rename(newpath, path)
print("rename" + path)
resp_buffer = StringIO()
# Configure client for request
curl.setopt(pycurl.VERBOSE, False)
curl.setopt(pycurl.NOSIGNAL, True)
curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
curl.setopt(pycurl.PROXY, "")
curl.setopt(pycurl.CUSTOMREQUEST, str(method))
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.POSTFIELDS, post_data)
curl.setopt(pycurl.HTTPHEADER, headers)
if req.read_timeout is None:
curl.setopt(pycurl.TIMEOUT, 0)
else:
curl.setopt(pycurl.TIMEOUT, int(req.read_timeout))
# Disable SSL session ID caching (pycurl >= 7.16.0)
if hasattr(pycurl, "SSL_SESSIONID_CACHE"):
curl.setopt(pycurl.SSL_SESSIONID_CACHE, False)
curl.setopt(pycurl.WRITEFUNCTION, resp_buffer.write)
# Pass cURL object to external config function
if req.curl_config_fn:
req.curl_config_fn(curl)
return _PendingRequest(curl, req, resp_buffer.getvalue)
def __init__(self, url, result_callback):
self.result_callback = result_callback
self.url = url
self.curl_ctx = pycurl.Curl()
self.curl_ctx.setopt(pycurl.FOLLOWLOCATION, 1)
self.curl_ctx.setopt(pycurl.MAXREDIRS, 5)
self.curl_ctx.setopt(pycurl.CONNECTTIMEOUT, 30)
self.curl_ctx.setopt(pycurl.TIMEOUT, 300)
self.curl_ctx.setopt(pycurl.NOSIGNAL, 1)
self.curl_ctx.setopt(pycurl.URL, str(url))
self.curl_ctx.ctx = self
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print("PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM))
print("----- Getting", num_urls, "URLs using", num_conn, "connections -----")
# Pre-allocate a list of curl objects
m = pycurl.CurlMulti()
m.handles = []
for i in range(num_conn):
c = pycurl.Curl()
c.fp = None
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.CONNECTTIMEOUT, 30)
c.setopt(pycurl.TIMEOUT, 300)
c.setopt(pycurl.NOSIGNAL, 1)
m.handles.append(c)
# Main loop
freelist = m.handles[:]
num_processed = 0
while num_processed < num_urls:
# If there is an url to process and a free curl object, add to multi stack
while queue and freelist:
url, filename = queue.pop(0)
c = freelist.pop()
c.fp = open(filename, "wb")
c.setopt(pycurl.URL, url)
c.setopt(pycurl.WRITEDATA, c.fp)
m.add_handle(c)