Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ofnset = []
cnt = 0
for course_id in course_id_set:
gb = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest)
ofn = outdir / ('person_course_%s.csv.gz' % (course_id.replace('/', '__')))
ofnset.append(ofn)
if (nskip>0) and ofn.exists():
print "%s already exists, not downloading" % ofn
sys.stdout.flush()
continue
if ofn.exists():
fnset = gsutil.get_gs_file_list(gb)
local_dt = gsutil.get_local_file_mtime_in_utc(ofn)
fnb = 'person_course.csv.gz'
if not fnb in fnset:
print "%s/%s missing! skipping %s" % (gb, fnb, course_id)
continue
if (fnb in fnset) and (local_dt >= fnset[fnb]['date']):
print "%s already exists with date %s (gs file date %s), not re-downloading" % (ofn, local_dt, fnset[fnb]['date'])
sys.stdout.flush()
continue
else:
print "%s already exists but has date %s (gs file date %s), so re-downloading" % (ofn, local_dt, fnset[fnb]['date'])
sys.stdout.flush()
cmd = 'gsutil cp %s/person_course.csv.gz %s' % (gb, ofn)
print "Retrieving %s via %s" % (course_id, cmd)
sys.stdout.flush()
os.system(cmd)
SCHEMA_FILE = '%s/schemas/schema_problem_analysis.json' % mypath
the_schema = json.loads(open(SCHEMA_FILE).read())['problem_analysis']
the_dict_schema = schema2dict(the_schema)
smfn = lfp / 'studentmodule.csv'
smfp = openfile(smfn)
if smfp is None:
print "--> [analyze_problems] oops, missing %s, cannot process course %s" % (smfn, course_id)
return
print "[analyze_problems] processing %s for course %s to create problem_analysis table" % (smfn, course_id)
sys.stdout.flush()
if smfp.name.endswith('.gz'):
smfn += '.gz'
sm_moddate = gsutil.get_local_file_mtime_in_utc(smfn, make_tz_unaware=True)
dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)
table = 'problem_analysis'
# if table already exists, then check its modification time to see if it's older
if not force_recompute:
try:
table_moddate = bqutil.get_bq_table_last_modified_datetime(dataset, table)
except Exception as err:
if "Not Found" in str(err):
table_moddate = None
else:
raise
if table_moddate is not None:
try:
print "Retrieving %s as %s" % (table, ofn)
if args.add_courseid and course_id_by_table:
optargs['extra_fields'] = {'course_id': course_id_by_table[table]}
print "--> Adding %s for %s to each row" % (course_id_by_table[table], 'course_id')
sys.stdout.flush()
if args.just_get_schema:
tinfo = bqutil.get_bq_table_info(dataset, tablename, **optargs)
ofn = '%s__%s__schema.json' % (dataset, tablename)
print "Saving schema file as %s" % ofn
open(ofn, 'w').write(json.dumps(tinfo['schema']['fields'], indent=4))
continue
if args.only_if_newer and os.path.exists(ofn):
mod_dt = bqutil.get_bq_table_last_modified_datetime(dataset, tablename)
of_dt = gsutil.get_local_file_mtime_in_utc(ofn,make_tz_unaware=True)
if (mod_dt < of_dt):
print "--> only_if_newer specified, and table %s mt=%s, file mt=%s, so skipping" % (tablename,
mod_dt,
of_dt)
continue
try:
bqdat = bqutil.get_table_data(dataset, tablename,
convert_timestamps=True,
return_csv=(out_fmt=='csv'), **optargs)
except Exception as err:
if args.skip_missing and 'HttpError 404' in str(err):
print "--> missing table [%s.%s] Skipping..." % (dataset, tablename)
sys.stdout.flush()
continue
raise