Skip to content
Snippets Groups Projects
Commit 6ddd9580 authored by Odin Johan Vatne's avatar Odin Johan Vatne
Browse files

Add command for loading data from csv

Add minor test data
parent dd27aaf6
No related branches found
No related tags found
No related merge requests found
from datetime import datetime, timedelta, timezone, tzinfo
import logging
from django.core.exceptions import *
import csv
import os
from distutils.util import strtobool
from contextlib import contextmanager
from pasapp.models import ProjectTag, TagCategory, Tag, Project, Application
from pasapp.forms import NewUserForm
from django.contrib.auth.models import User
to_bool = lambda str: bool(strtobool(str))
short_map = {'w': 'weeks', 'd': 'days', 'h': 'hours', 'm': 'minutes'}
cest_tz = timezone(timedelta(hours=+5), 'CEST')
def read_csv(input_folder):
tag_category_map = {}
tag_map = {}
project_map = {}
application_map = {}
output_stats = {}
def stats_add(key, value):
if key in output_stats:
output_stats[key] += value
else:
output_stats[key] = value
def get_or_throw(map, key):
if key in map:
return map[key]
raise ObjectDoesNotExist
def get_params(keys, row, skipped=()):
output = {}
for key, col in keys:
if key == 'ref_id': continue
if key in skipped: continue
if key.startswith('f_') or key.startswith('ff_'): continue # foreign keys, case-by-case handling
value = row[col]
if value == '': continue # uses model default, use ' ' if a blank string is needed
if key.startswith('b_'): # boolean fields
key = key[2:]
value = to_bool(value)
elif key.startswith('i_'): # integer fields
key = key[2:]
value = int(value)
elif key.startswith('d_'): # datetime fields
key = key[2:]
if value.startswith('n-'): # shorthand for datetimes in relative past to when the data was populated
# (n)ow minus: (w)eeks, (d)ays, (h)ours, (m)inutes: n-1w2h25m
args = {}
n = ''
for c in value[2:]:
if c in '0123456789':
n+=c
elif c in short_map:
args[short_map[c]] = int(n)
n = ''
value = datetime.now(tz=cest_tz) - timedelta(**args)
else:
# 1998.11.21 14:46:55 -> 1998, 11, 21, 14, 46, 55
value = datetime(*[int(n) for n in value.replace('.', ':').replace(' ', ':').split(':')], tzinfo=cest_tz)
output[key] = value
return output
def read_tag_categories(file):
# ref_id, category (name), (b_academic)
col_map = {}
created_cnt = 0
failed_cnt = 0
existing_cnt = 0
error_cnt = 0
def cnt_increment():
nonlocal error_cnt
error_cnt += 1
with open(file) as f:
reader = csv.reader(f)
error_logger = ErrorLogger('tagcategories', cnt_increment)
for row_num, row in enumerate(reader):
if row_num == 0:
for i, val in enumerate(row):
col_map[val] = i
if 'ref_id' not in col_map:
logging.error(f"'tagcategories.csv' is missing necessary column 'ref_id'")
return
else:
ref_id = row[col_map['ref_id']]
params = get_params(col_map.items(), row)
with error_logger.handle(row_num):
retrieved, created = TagCategory.objects.get_or_create(**params)
if retrieved:
tag_category_map[ref_id] = retrieved.pk
if created:
created_cnt += 1
else:
existing_cnt +=1
else:
logging.warning(f"Could not create TagCategory {params['category']}, ref_id {ref_id}. Found at line {row_num} of 'tagcategories.csv'.")
failed_cnt += 1
if created_cnt > 0: stats_add('Tag Categories created', created_cnt)
if existing_cnt > 0: stats_add('Tag Categories already existing', existing_cnt)
if failed_cnt > 0: stats_add('Tag Categories failed', failed_cnt+error_cnt)
if error_cnt > 0: stats_add('Errors occured while creating Tag Categories', error_cnt)
def read_tags(file):
# (ref_id), name, f_category
col_map = {}
created_cnt = 0
failed_cnt = 0
existing_cnt = 0
error_cnt = 0
def cnt_increment():
nonlocal error_cnt
error_cnt += 1
with open(file) as f:
reader = csv.reader(f)
error_logger = ErrorLogger('tags', cnt_increment)
for row_num, row in enumerate(reader):
if row_num == 0:
for i, val in enumerate(row):
col_map[val] = i
for key in ('name', 'f_category'):
if key not in col_map:
logging.error(f"'tags.csv' is missing necessary column '{key}'")
return
else:
ref_id = row[col_map['ref_id']] if 'ref_id' in col_map else False
name = row[col_map['name']]
with error_logger.handle(row_num):
category_id = get_or_throw(tag_category_map, row[col_map['f_category']])
retrieved, created = Tag.objects.get_or_create(name=name, category_id=category_id)
if retrieved:
if ref_id:
tag_map[ref_id] = retrieved.pk
if created:
created_cnt += 1
else:
existing_cnt +=1
else:
logging.warning(f"Could not create Tag {name}, ref_id {ref_id}. Found at line {row_num} of 'tags.csv'.")
failed_cnt += 1
if created_cnt > 0: stats_add('Tags created', created_cnt)
if existing_cnt > 0: stats_add('Tags already existing', existing_cnt)
if failed_cnt > 0: stats_add('Tags failed', failed_cnt+error_cnt)
if error_cnt > 0: stats_add('Errors occured while creating tags', error_cnt)
def read_users(file):
# username, email, (first_name, last_name, b_isProfessor), password -> 'password'
col_map = {}
created_cnt = 0
failed_cnt = 0
existing_cnt = 0
with open(file) as f:
reader = csv.reader(f)
for row_num, row in enumerate(reader):
if row_num == 0:
for i, val in enumerate(row):
col_map[val] = i
for key in ('username', 'email'):
if key not in col_map:
logging.error(f"'tags.csv' is missing necessary column '{key}'")
return
else:
form_args = get_params(col_map.items(), row, ('password',))
form_args['password1'] = form_args['password2'] = 'password'
try:
existing_user = User.objects.get(username=form_args['username'])
existing_cnt += 1
except:
form = NewUserForm(form_args)
if form.is_valid():
form.save()
created_cnt += 1
else:
logging.warning(f"Could not create User {row[col_map['username']]}, found at line {row_num} of 'users.csv':")
logging.warning(form.errors)
failed_cnt += 1
if created_cnt > 0: stats_add('Users created', created_cnt)
if existing_cnt > 0: stats_add('Users already existing', existing_cnt)
if failed_cnt > 0: stats_add('Users failed', failed_cnt)
def read_projects(file):
# (ref_id), title, f_professor, ff_tags, (description, status, b_hidden, d_date_created, d_last_updated)
col_map = {}
created_cnt = 0
failed_cnt = 0
existing_cnt = 0
error_cnt = 0
created_tag_cnt = 0
existing_tag_cnt = 0
failed_tag_cnt = 0
error_tag_cnt = 0
def cnt_increment():
nonlocal error_cnt
error_cnt += 1
def tag_cnt_increment():
nonlocal error_tag_cnt
error_tag_cnt += 1
with open(file) as f:
reader = csv.reader(f)
error_logger = ErrorLogger('projects', cnt_increment)
tag_error_logger = ErrorLogger('projectTags', tag_cnt_increment)
for row_num, row in enumerate(reader):
if row_num == 0:
for i, val in enumerate(row):
col_map[val] = i
for key in ('f_professor', 'title', 'status'):
if key not in col_map:
logging.error(f"'tags.csv' is missing necessary column '{key}'")
return
else:
with error_logger.handle(row_num):
ref_id = row[col_map['ref_id']] if 'ref_id' in col_map else None
professor = User.objects.get(username=row[col_map['f_professor']])
params = get_params(col_map.items(), row)
copies = Project.objects.filter(title=row[col_map['title']], professor=professor, status=row[col_map['status']])
if len(copies) > 0:
retrieved = copies[0]
created = False
else:
retrieved, created = Project.objects.get_or_create(professor=professor, **params)
# will never get, only create, due to no unique fields
if retrieved:
if ref_id:
project_map[ref_id] = retrieved.pk
if created:
created_cnt += 1
else:
existing_cnt += 1
if 'ff_tags' in col_map:
tags_ref_list = row[col_map['ff_tags']].split(';')
for tag_ref in tags_ref_list:
with tag_error_logger.handle(row_num):
tag_id = get_or_throw(tag_map, tag_ref)
project_tag, created_tag = ProjectTag.objects.get_or_create(project=retrieved, tag_id=tag_id)
if not project_tag:
failed_tag_cnt += 1
continue
if created_tag:
created_tag_cnt += 1
else:
existing_tag_cnt += 1
else:
logging.warning(f"Could not create Project with ref_id {ref_id}. Found at line {row_num} of projects.csv.")
failed_cnt += 1
if created_cnt > 0: stats_add('Projects created', created_cnt)
if existing_cnt > 0: stats_add('Projects already existing', existing_cnt)
if failed_cnt > 0: stats_add('Projects failed', failed_cnt+error_cnt)
if error_cnt > 0: stats_add('Errors occured while creating Projects', error_cnt)
if created_tag_cnt > 0: stats_add('ProjectTags created', created_tag_cnt)
if existing_tag_cnt > 0: stats_add('ProjectTags already existing', existing_tag_cnt)
if failed_tag_cnt > 0: stats_add('ProjectTags failed', failed_tag_cnt)
if error_tag_cnt > 0: stats_add('Errors occured while creating ProjectTags', error_tag_cnt)
def read_applications(file):
# (ref_id), f_project, f_student, (i_priority, message, professor_status, student_status, d_date_created, d_last_updated)
col_map = {}
created_cnt = 0
failed_cnt = 0
existing_cnt = 0
error_cnt = 0
def cnt_increment():
nonlocal error_cnt
error_cnt += 1
with open(file) as f:
reader = csv.reader(f)
error_logger = ErrorLogger('applications', cnt_increment)
for row_num, row in enumerate(reader):
if row_num == 0:
for i, val in enumerate(row):
col_map[val] = i
for key in ('f_project', 'f_student'):
if key not in col_map:
logging.error(f"'tags.csv' is missing necessary column '{key}'")
return
else:
with error_logger.handle(row_num):
ref_id = row[col_map['ref_id']] if 'ref_id' in col_map else None
project_id = get_or_throw(project_map, row[col_map['f_project']]) #Project.objects.get(id=row[col_map['f_project']])
student = User.objects.get(username=row[col_map['f_student']])
params = get_params(col_map.items(), row)
retrieved, created = Application.objects.get_or_create(project_id=project_id, student=student, **params)
if retrieved:
if ref_id:
application_map[ref_id] = retrieved.pk
if created:
created_cnt += 1
else:
existing_cnt +=1
else:
logging.warning(f"Could not create Application by {row[col_map['f_student']]} on project with refID '{row[col_map['f_project']]}'. Found at line {row_num} of applications.csv.")
failed_cnt += 1
if created_cnt > 0: stats_add('Applications created', created_cnt)
if existing_cnt > 0: stats_add('Applications already existing', existing_cnt)
if failed_cnt > 0: stats_add('Applications failed', failed_cnt+error_cnt)
if error_cnt > 0: stats_add('Errors occured while creating Applications', error_cnt)
deferred_functions = {'tagcategories': read_tag_categories,
'tags': read_tags,
'users': read_users,
'projects': read_projects,
'applications': read_applications}
files = [os.path.join(input_folder, file) for file in os.listdir(input_folder)]
validfiles = [file for file in files if os.path.isfile(file) and file.endswith(".csv")]
for modeltype in ['tagcategories', 'tags', 'users', 'projects', 'applications']:
modelfile = None
for file in validfiles:
if file.endswith(modeltype + ".csv"):
modelfile = file
break
if modelfile is None:
logging.warning(f'No file found for {modeltype}.')
continue
deferred_functions[modeltype](modelfile)
for title, value in output_stats.items():
print(f'{title}: {value}.')
return
class ErrorLogger():
def __init__(self, filename, counter_func):
self.counter_func = counter_func
self.filename = f"'{filename}.csv'"
@contextmanager
def handle(self, row_num, ref_id=None):
try:
error_raised = True
if ref_id is None:
position = f"model at line {row_num} in {self.filename}"
else:
position = f"refID '{ref_id}' in a field of the model at line {row_num} in {self.filename}"
yield
except FieldDoesNotExist:
logging.error(f"One or more of the fields specified in the header of {self.filename} were not defined in the model.")
except FieldError:
logging.error(f"One or more of the fields specified in the header of {self.filename} were not defined in the model.")
except ValidationError:
logging.error(f"The {position} failed to validate. This is likely a missing foreign key issue.")
except ObjectDoesNotExist as e:
if ref_id is None:
logging.error(f"A foreign key or refID at line {row_num} in {self.filename} was not found.")
logging.error(e)
else:
logging.error(f"The refID '{ref_id}' at line {row_num} in {self.filename} was not found.")
except Exception as e:
logging.error(f"The {position} could not be imported.")
logging.error(e)
else:
error_raised = False
finally:
if error_raised:
self.counter_func()
# with open(modelfile) as f:
# reader = csv.reader(f)
# col_map = {}
# for row_num, row in enumerate(reader):
# if row_num == 0:
# for i, val in enumerate(row):
# col_map[i] = val
# else:
# model_args = {col_map[i]: value for i, value in enumerate(row)}
# model_args = {col_map[i]: value for i, value in enumerate(row)}
# try:
# gotten, created = model.get_or_create(**model_args)
# if gotten:
# Logger.warn(f"Model at line {str(row_num)} already exists.")
# elif not created:
# Logger.warn(f"Could not create the model specified at line {str(row_num)}.")
# except FieldDoesNotExist:
# Logger.error("One or more of the fields specified in the csv file's header were not defined in the model.")
# except FieldError:
# Logger.error("One or more of the fields specified in the csv file's header were not defined in the model.")
# except ValidationError:
# Logger.error(f"The model at line {str(row_num)} failed to validate.")
# except:
# Logger.error(f"The model at line {str(row_num)} could not be imported.")
\ No newline at end of file
from django.core.management.base import BaseCommand
from django.apps import apps
from ._readcsv import read_csv
import os
def dir_path(string):
if os.path.isdir(string):
return string
else:
raise NotADirectoryError(string)
class Command(BaseCommand):
help = 'Loads a test dataset into the database from a folder.'
def add_arguments(self, parser):
parser.add_argument('folder_path', nargs=1, type=dir_path)
def handle(self, *args, **options):
directory = options['folder_path'][0]
if directory:
read_csv(os.path.normpath(directory))
ref_id,f_project,f_student,i_priority,professor_status,student_status,d_date_created,d_last_updated,message
,tree_surv,emile,,,,,,"I think trees are really neat"
\ No newline at end of file
ref_id,title,f_professor,status,b_hidden,d_date_created,d_last_updated,ff_tags,description
tree_surv,Noninvasive devices for long-term pine tree surveys,frankf,Open,no,n-1d,,size_1;size_g;comm_g,"The project will involve creating a prototype sensor device for long term survey of norwegian pine trees, in collaboration with the Norwegian Forestry and Wildlife Department",
\ No newline at end of file
ref_id,category,b_academic
priv,Privacy Level,true
size,Group Size,false
comm,Customer Type,false
\ No newline at end of file
ref_id,name,f_category
priv_sens,Sensitive,priv
priv_conf,Confidential,priv
size_1,Solo,size
size_g,Group,size
size_d,Duo,size
comm_u,University Project,comm
comm_g,Government Project,comm
comm_p,Third Party Project,comm
\ No newline at end of file
username,email,first_name,last_name,b_isProfessor
emile,emile@stud.ntnu.no,Emil,Ebert,no
frankf,frankf@ntnu.no,Frank,Farenheit,yes
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment