Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param user_id: a string consists of TENANT and USER name used for
asserting Owner ID (not required S3Connection)
In default, Connection class will be initialized as tester user
behaves as:
user_test_tester = testing .admin
"""
self.aws_access_key = aws_access_key
self.aws_secret_key = aws_secret_key
self.user_id = user_id
# NOTE: auth_host and auth_port can be different from storage location
self.host = tf.config['auth_host']
self.port = int(tf.config['auth_port'])
self.conn = \
S3Connection(aws_access_key, aws_secret_key, is_secure=False,
host=self.host, port=self.port,
calling_format=OrdinaryCallingFormat())
self.conn.auth_region_name = 'us-east-1'
def download_pmc_zip_from_s3(self, doi_id, workflow):
"""
Simple download of PMC zip file from the live bucket
"""
bucket_name = self.pmc_zip_bucket
prefix = self.pmc_zip_folder
# Connect to S3 and bucket
s3_conn = S3Connection(self.settings.aws_access_key_id,
self.settings.aws_secret_access_key)
bucket = s3_conn.lookup(bucket_name)
s3_key_names = s3lib.get_s3_key_names_from_bucket(
bucket=bucket,
prefix=prefix)
s3_key_name = s3lib.latest_pmc_zip_revision(doi_id, s3_key_names)
if s3_key_name:
# Download
s3_key = bucket.get_key(s3_key_name)
filename = s3_key_name.split("/")[-1]
def connect_s3(self):
"""
Connect to S3. Returns the boto connection object.
"""
return S3Connection()
def create_connections(self):
self.sqs_conn = SQSConnection(self.aws_access_key_id,
self.aws_secret_access_key)
if self.input_queue_name:
self.input_queue = self.get_queue(self.input_queue_name)
self.s3_conn = S3Connection(self.aws_access_key_id,
self.aws_secret_access_key)
def mp_from_ids(s3server, mp_id, mp_keyname, mp_bucketname):
"""Get the multipart upload from the bucket and multipart IDs.
This allows us to reconstitute a connection to the upload
from within multiprocessing functions.
"""
if s3server['host']:
conn = boto.connect_s3(aws_access_key_id=s3server['access_key'],
aws_secret_access_key=s3server['secret_key'],
is_secure=s3server['is_secure'],
host=s3server['host'],
port=s3server['port'],
calling_format=boto.s3.connection.OrdinaryCallingFormat(),
path=s3server['conn_path'])
else:
conn = S3Connection(s3server['access_key'], s3server['secret_key'])
bucket = conn.lookup(mp_bucketname)
mp = boto.s3.multipart.MultiPartUpload(bucket)
mp.key_name = mp_keyname
mp.id = mp_id
return mp
"""
Sets up the queue adds all files (text or warc or wat or wet), creates bucket to store output
"""
#IAM
try:
setup_iam()
except:
print "Error while setting up IAM PROFILE, most likely due to existing profile"
logging.exception("Error while setting up IAM PROFILE, most likely due to existing profile")
pass
#S3 bucket
from boto.s3.connection import S3Connection
from cclib.commoncrawl import CommonCrawl
logging.getLogger('boto').setLevel(logging.CRITICAL)
import filequeue
S3 = S3Connection()
logging.info("Creating bucket "+OUTPUT_S3_BUCKET)
S3.create_bucket(OUTPUT_S3_BUCKET)
logging.info("bucket created")
# SQS
crawl = CommonCrawl(CRAWL_ID)
file_list = crawl.get_file_list(FILE_TYPE) # Text files
file_queue = filequeue.FileQueue(JOB_QUEUE,VISIBILITY_TIMEOUT,file_list)
logging.debug("Adding "+str(len(file_list))+" "+FILE_TYPE+" files to queue "+JOB_QUEUE)
file_queue.add_files()
logging.debug("Finished adding files")
print "Finished adding files"
def s3_bucket(bucket=None):
"""
Shortcut to a boto s3 bucket
Uses settings.ACCESS_KEY, settings.AWS_SECRET_KEY
defaults to settings.AWS_MEDIA_BUCKET
"""
conn = S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY)
if not bucket:
try:
bucket = settings.AWS_MEDIA_BUCKET
except:
raise Exception("No bucket specified and no settings.AWS_MEDIA_BUCKET")
return conn.create_bucket(bucket)
backup_tarfile.add(record_file.name, arcname=cls_name+'.json')
self.log("%s => %d records backed up", cls_name, rec_count)
# Finalize archive
backup_tarfile.close()
backup_file.flush()
backup_size = os.stat(backup_file.name)[6]
# Figure out key name for archived file
key_name = ('Backup_' + now_field() + '.tar.gz').replace(':', '_')
# upload archive to s3
if os.environ.get('DEBUG', False) or os.environ.get('travis', False):
# Local or CI - connect to our mock s3 service
conn = S3Connection(
'', '',
is_secure=False, port=8888, host='localhost',
calling_format=OrdinaryCallingFormat()
)
else:
conn = S3Connection(self.aws_access_key, self.aws_secret_key)
bucket = conn.get_bucket(self.bucketname)
key = Key(bucket)
key.key = key_name
self.log(
"Sending %s [size=%d bytes] with key name %s",
backup_file.name,
backup_size,
key_name
# File storage
if get_env("AWS_ACCESS_KEY_ID", ""):
AWS_ACCESS_KEY_ID = get_env("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = get_env("AWS_SECRET_ACCESS_KEY")
AWS_STORAGE_BUCKET_NAME = get_env("AWS_BUCKET_NAME")
AWS_AUTO_CREATE_BUCKET = True
AWS_QUERYSTRING_AUTH = False
AWS_EXPIRY = 60 * 60 * 24 * 7
AWS_S3_FILE_OVERWRITE = False
AWS_HEADERS = {"Cache-Control": "max-age={}".format(AWS_EXPIRY)}
AWS_S3_CALLING_FORMAT = OrdinaryCallingFormat()
# Retrieve S3 files using https, with a bucket that contains a dot.
S3Connection.DefaultHost = "s3-eu-west-1.amazonaws.com"
DEFAULT_FILE_STORAGE = "storages.backends.s3boto.S3BotoStorage"
THUMBNAIL_DEFAULT_STORAGE = "storages.backends.s3boto.S3BotoStorage"
# Uploaded media
MEDIA_URL = "/media/"
MEDIA_ROOT = get_env("MEDIA_PATH")
# Static files, if in production use static root, else use static dirs
# Static URL to use when referring to static files located in STATIC_ROOT.
STATIC_URL = "/static/"
# The absolute path to the directory where collectstatic will collect static
def upload_image(self, tmp_path, dest_filename, content_type):
bucket_name = self.config.get('aws', 'bucket_name')
access_key = self.config.get('aws', 'access_key')
secret_access_key = self.config.get('aws', 'secret_access_key')
url_prefix = self.config.get('aws', 'url_prefix')
dest_path = os.path.join(url_prefix, dest_filename)
url = 'http://s3.amazonaws.com/%s/%s' % (bucket_name, dest_path)
conn = S3Connection(access_key, secret_access_key)
bucket = conn.create_bucket(bucket_name)
key = Key(bucket)
key.key = dest_path
try:
key.set_contents_from_filename(tmp_path,
policy='public-read',
headers={'Content-Type': content_type,
'Max-Age': 604800 })
log.notice("Uploaded image {0} to {1}", tmp_path, url)
return url
except IOError as e:
log.warn("Failed to upload image {0} to {1} because {2}", tmp_path, url, str(e))
return None