import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import timedelta
# Load data
source = {}
source[2019] = pd.read_csv('data/University_Timelogs_2019-01-01_to_2020-01-01.csv')
source[2020] = pd.read_csv('data/University_Timelogs_2020-01-01_to_2020-12-31.csv')
source[2021] = pd.read_csv('data/University_Timelogs_2021-01-01_to_2021-12-31.csv')
source['ba'] = pd.read_csv('data/BA_Timelogs_2019-04-01_to_2020-03-31.csv')
# Convert data
for k, df in source.items():
source[k]['Start'] = pd.to_datetime(df['Start date']) + pd.to_timedelta(df['Start time'])
source[k]['End'] = pd.to_datetime(df['End date']) + pd.to_timedelta(df['End time'])
source[k]['Duration'] = pd.to_timedelta(df['Duration'])
# Get distinct lectures and activity types
activity_types = ['Lecture', 'Exercise', 'Reading', 'Project', 'Study', 'Preparation']
# Convert tags to activity type column
for k, df in source.items():
if k == 'ba':
source[k]['Lecture'] = 'BA Thesis'
source[k]['Type'] = None
continue
source[k]['Lecture'] = None
source[k]['Type'] = None
for i, t in df['Tags'].items():
if t in activity_types:
source[k]['Lecture'][i] = df['Description'][i].strip()
source[k]['Type'][i] = t
# List of postfixes to convert to activity types
postfixes = {
'Projekt': 'Project',
'Prüfungsvorbereitung': 'Study',
'Klausurvorbereitung': 'Study',
'Klausur': 'Study',
'Prüfung': 'Study',
'Übungen': 'Exercise',
'Aufgabe': 'Exercise',
'Einführung': 'Lecture',
'Vorbereitung': 'Preparation',
'Abschlussbericht': 'Exercise',
'Bericht': 'Exercise',
'Aufgaben': 'Exercise',
'Dokumentation': 'Exercise',
'Lernen': 'Study',
'Essay': 'Exercise',
'Homework': 'Exercise',
'Präsentation': 'Exercise',
}
# Convert postfixes to type and lecture
for k, df in source.items():
for i, d in df[df['Lecture'].isnull()]['Description'].items():
for p, t in postfixes.items():
if d.lower().endswith(p.lower()):
source[k]['Lecture'][i] = d[:-len(p)].strip()
source[k]['Type'][i] = t
break
else:
source[k]['Lecture'][i] = d.strip()
source[k]['Type'][i] = 'Lecture'
source[k]['Type'] = pd.Categorical(df['Type'])
combined_data = pd.concat(source)
combined_data
User | Client | Project | Task | Description | Billable | Start date | Start time | End date | End time | Duration | Tags | Amount () | Start | End | Lecture | Type | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2019 | 0 | Christoph Jabs | NaN | Hochschule | NaN | Akustik | No | 2019-01-09 | 16:30:13 | 2019-01-09 | 18:00:13 | 0 days 01:30:00 | NaN | NaN | 2019-01-09 16:30:13 | 2019-01-09 18:00:13 | Akustik | Lecture |
1 | Christoph Jabs | NaN | Hochschule | NaN | Microcontroller | No | 2019-01-10 | 10:45:17 | 2019-01-10 | 13:00:19 | 0 days 02:15:02 | Hochschule | NaN | 2019-01-10 10:45:17 | 2019-01-10 13:00:19 | Microcontroller | Lecture | |
2 | Christoph Jabs | NaN | Hochschule | NaN | Regelungstechnik Projekt | No | 2019-01-10 | 13:55:41 | 2019-01-10 | 14:39:30 | 0 days 00:43:49 | Hochschule | NaN | 2019-01-10 13:55:41 | 2019-01-10 14:39:30 | Regelungstechnik | Project | |
3 | Christoph Jabs | NaN | Hochschule | NaN | Technische Mechanik | No | 2019-01-10 | 14:45:19 | 2019-01-10 | 18:00:19 | 0 days 03:15:00 | Hochschule | NaN | 2019-01-10 14:45:19 | 2019-01-10 18:00:19 | Technische Mechanik | Lecture | |
4 | Christoph Jabs | NaN | Hochschule | NaN | Regelungstechnik Projekt | No | 2019-01-11 | 15:39:43 | 2019-01-11 | 19:13:06 | 0 days 03:33:23 | Hochschule | NaN | 2019-01-11 15:39:43 | 2019-01-11 19:13:06 | Regelungstechnik | Project | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ba | 346 | Christoph Jabs | NaN | Bacherlorarbeit | NaN | Review: Overall | No | 2020-02-09 | 19:20:05 | 2020-02-09 | 19:31:54 | 0 days 00:11:49 | NaN | NaN | 2020-02-09 19:20:05 | 2020-02-09 19:31:54 | BA Thesis | NaN |
347 | Christoph Jabs | NaN | Bacherlorarbeit | NaN | Review: Overall | No | 2020-02-10 | 07:10:00 | 2020-02-10 | 07:34:00 | 0 days 00:24:00 | NaN | NaN | 2020-02-10 07:10:00 | 2020-02-10 07:34:00 | BA Thesis | NaN | |
348 | Christoph Jabs | NaN | Bacherlorarbeit | NaN | Review: Overall | No | 2020-02-10 | 08:04:14 | 2020-02-10 | 09:17:25 | 0 days 01:13:11 | NaN | NaN | 2020-02-10 08:04:14 | 2020-02-10 09:17:25 | BA Thesis | NaN | |
349 | Christoph Jabs | NaN | Bacherlorarbeit | NaN | Review: Overall | No | 2020-02-10 | 11:00:00 | 2020-02-10 | 12:50:00 | 0 days 01:50:00 | NaN | NaN | 2020-02-10 11:00:00 | 2020-02-10 12:50:00 | BA Thesis | NaN | |
350 | Christoph Jabs | NaN | Bacherlorarbeit | NaN | Review: Overall | No | 2020-02-10 | 13:55:57 | 2020-02-10 | 17:44:15 | 0 days 03:48:18 | NaN | NaN | 2020-02-10 13:55:57 | 2020-02-10 17:44:15 | BA Thesis | NaN |
1443 rows × 17 columns
combined_data[combined_data['Lecture'] == 'Betriebs- und Kommunikationssysteme']['Lecture'] = 'Betriebssysteme und Echtzeit'
combined_data[combined_data['Lecture'] == 'Buisness English']['Lecture'] = 'Business English'
combined_data[combined_data['Lecture'] == 'Signale und Systeme II']['Lecture'] = 'Digitale Signalverarbeitung'
combined_data[combined_data['Lecture'] == 'Seminar Leitfaden Abschlussarbeit']['Lecture'] = 'Leitfaden Abschlussarbeit'
grouped_data = combined_data.groupby('Lecture')
grouped_data['Duration'].sum()
Lecture Akustik 0 days 16:07:56 Algorithms and Complexity 4 days 00:06:28 Approximation Algorithms 3 days 02:51:25 BA Thesis 13 days 20:59:44 BWL/Rechnungswesen 0 days 20:06:05 Betriebs- und Kommunikationssysteme 0 days 01:50:00 Betriebssysteme 0 days 00:50:32 Betriebssysteme und Echtzeit 1 days 04:27:13 Betriebssysteme und Echtzeit Praktikum 0 days 13:14:22 Building AI 0 days 04:26:13 Buisness English 1 days 00:41:40 Business English 0 days 03:40:33 CS Colloquium 0 days 12:20:42 Combinatorial Optimization 2 days 02:59:31 Data Analysis with Python 0 days 19:16:09 Design and Analysis of Algorithms 1 days 18:54:37 Digitale Signalverarbeitung 0 days 09:34:25 Elektrische Antriebe 1 days 06:03:16 Elektrische Antriebe Praktikum 0 days 21:23:57 Elements of AI 0 days 06:00:31 Finnish 1A 0 days 21:27:00 Finnish 1B 0 days 22:50:17 Finnish 2A 1 days 02:15:33 Introduction to Data Science 3 days 06:03:35 Introduction to the Programming of Quantum Computers 0 days 15:35:44 Kommunikationssysteme 1 days 02:23:43 Kommunikationssysteme Praktikum 0 days 10:40:34 Kreativer Systementwurf 1 days 19:02:09 Leistungselektronik 1 days 03:37:16 Leitfaden Abschlussarbeit 0 days 01:30:00 Microcontroller 0 days 09:55:16 Microcontroller Praktikum 0 days 03:15:00 Modellierung und Simulation heterogener Systeme 0 days 05:29:58 Network Analysis 1 days 15:12:49 Praktikumsbewerbung 0 days 00:42:55 Praxissemester 1 days 05:26:12 Präsentation Stipendium 0 days 06:39:21 Recht 0 days 19:42:00 Regelungstechnik 1 days 17:14:41 Regelungstechnik II 0 days 20:48:53 Regelungstechnik II Praktikum 0 days 19:45:18 Robotersysteme 2 days 10:30:34 Robotersysteme Praktikum 0 days 22:06:53 Seminar Leitfaden Abschlussarbeit 0 days 01:12:50 Signale und Systeme II 0 days 02:00:00 Software Engineering 0 days 06:37:13 Statistical Machine Learning 5 days 09:41:23 Steuerungstechnik 0 days 04:42:50 Stipendium 0 days 00:08:46 String Processing Algorithms 2 days 11:16:27 Studienbericht Studienstiftung 0 days 02:48:39 TOEFL 0 days 06:51:59 Technische Mechanik 0 days 16:20:10 Theoretische Informatik 4 days 02:52:57 Name: Duration, dtype: timedelta64[ns]
lectures = pd.DataFrame.from_dict({
'Akustik': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Algorithms and Complexity': {'ECTS': 9, 'completed': True, 'University': 'EKU'},
'Approximation Algorithms': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'BA Thesis': {'ECTS': 14, 'completed': True, 'University': 'RT'},
'BWL/Rechnungswesen': {'ECTS': 2, 'completed': True, 'University': 'RT'},
'Betriebssysteme und Echtzeit': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Betriebssysteme und Echtzeit Praktikum': {'ECTS': 1, 'completed': True, 'University': 'RT'},
'Building AI': {'ECTS': 2, 'completed': True, 'University': 'UH'},
'Business English': {'ECTS': 2, 'completed': True, 'University': 'RT'},
'CS Colloquium': {'ECTS': 5, 'completed': False, 'University': 'UH'},
'Combinatorial Optimization': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'Data Analysis with Python': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'Design and Analysis of Algorithms': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'Digitale Signalverarbeitung': {'ECTS': 5, 'completed': False, 'University': 'RT'},
'Elektrische Antriebe': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Elektrische Antriebe Praktikum': {'ECTS': 2, 'completed': True, 'University': 'RT'},
'Elements of AI': {'ECTS': 2, 'completed': True, 'University': 'UH'},
'Finnish 1A': {'ECTS': 3, 'completed': True, 'University': 'UH'},
'Finnish 1B': {'ECTS': 2, 'completed': True, 'University': 'UH'},
'Finnish 2A': {'ECTS': 3, 'completed': True, 'University': 'UH'},
'Finnish 2B': {'ECTS': 2, 'completed': False, 'University': 'UH'},
'Introduction to Data Science': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'Introduction to the Programming of Quantum Computers': {'ECTS': 5, 'completed': False, 'University': 'UH'},
'Kommunikationssysteme': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Kommunikationssysteme Praktikum': {'ECTS': 1, 'completed': True, 'University': 'RT'},
'Kreativer Systementwurf': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Leistungselektronik': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Leitfaden Abschlussarbeit': {'ECTS': 0, 'completed': False, 'University': 'RT'},
'Microcontroller': {'ECTS': 5, 'completed': False, 'University': 'RT'},
'Microcontroller Praktikum': {'ECTS': 2, 'completed': False, 'University': 'RT'},
'Modellierung und Simulation heterogener Systeme': {'ECTS': 4, 'completed': False, 'University': 'RT'},
'Network Analysis': {'ECTS': 5, 'completed': True, 'University': 'UH'},
'Praktikumsbewerbung': {'ECTS': 0, 'completed': False, 'University': 'RT'},
'Praxissemester': {'ECTS': 0, 'completed': False, 'University': 'RT'},
'Präsentation Stipendium': {'ECTS': 0, 'completed': False, 'University': 'Studienstiftung'},
'Recht': {'ECTS': 2, 'completed': True, 'University': 'RT'},
'Regelungstechnik': {'ECTS': 3, 'completed': False, 'University': 'RT'},
'Regelungstechnik II': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Regelungstechnik II Praktikum': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Robotersysteme': {'ECTS': 5, 'completed': True, 'University': 'RT'},
'Robotersysteme Praktikum': {'ECTS': 3, 'completed': True, 'University': 'RT'},
'Software Engineering': {'ECTS': 3, 'completed': False, 'University': 'RT'},
'Statistical Machine Learning': {'ECTS': 9, 'completed': True, 'University': 'EKU'},
'Steuerungstechnik': {'ECTS': 5, 'completed': False, 'University': 'RT'},
'Stipendium': {'ECTS': 0, 'completed': False, 'University': 'Studienstiftung'},
'String Processing Algorithms': {'ECTS': 0, 'completed': False, 'University': 'UH'},
'Studienbericht Studienstiftung': {'ECTS': 0, 'completed': False, 'University': 'Studienstiftung'},
'TOEFL': {'ECTS': 0, 'completed': False, 'University': 'UH'},
'Technische Mechanik': {'ECTS': 4, 'completed': False, 'University': 'UH'},
'Theoretische Informatik': {'ECTS': 9, 'completed': True, 'University': 'EKU'},
}, orient='index')
online_courses = [
'Elements of AI',
'Building AI',
'Data Analysis with Python',
]
lectures['online course'] = False
lectures['online course'][lectures.index.isin(online_courses)] = True
lectures.index = lectures.index.set_names('Lecture')
lectures['Lecture'] = lectures.index
# Rename universities
lectures['University'][lectures['University'] == 'RT'] = 'Reutlingen'
lectures['University'][lectures['University'] == 'EKU'] = 'Tübingen'
lectures['University'][lectures['University'] == 'UH'] = 'Helsinki'
hours_per_ects = 25
lectures['Time effort'] = grouped_data['Duration'].sum()
lectures['Required time'] = (lectures['ECTS'] * hours_per_ects).apply(lambda t: timedelta(hours=t))
lectures['Time fraction'] = lectures['Time effort'] / lectures['Required time']
lectures['Time fraction'] = lectures['Time fraction'].replace([np.inf, -np.inf], np.nan)
# Drop unfinished lectures
lectures = lectures[lectures['completed'] == True]
# Convert time to string for display
def conv(t):
ts = t.total_seconds()
hours, remainder = divmod(ts, 3600)
minutes, _ = divmod(remainder, 60)
return ('{} hours {} minutes').format(int(hours), int(minutes))
lectures['Time spent'] = lectures['Time effort'].apply(conv)
lectures
ECTS | completed | University | online course | Lecture | Time effort | Required time | Time fraction | Time spent | |
---|---|---|---|---|---|---|---|---|---|
Lecture | |||||||||
Akustik | 3 | True | Reutlingen | False | Akustik | 0 days 16:07:56 | 3 days 03:00:00 | 0.215096 | 16 hours 7 minutes |
Algorithms and Complexity | 9 | True | Tübingen | False | Algorithms and Complexity | 4 days 00:06:28 | 9 days 09:00:00 | 0.427146 | 96 hours 6 minutes |
Approximation Algorithms | 5 | True | Helsinki | False | Approximation Algorithms | 3 days 02:51:25 | 5 days 05:00:00 | 0.598856 | 74 hours 51 minutes |
BA Thesis | 14 | True | Reutlingen | False | BA Thesis | 13 days 20:59:44 | 14 days 14:00:00 | 0.951416 | 332 hours 59 minutes |
BWL/Rechnungswesen | 2 | True | Reutlingen | False | BWL/Rechnungswesen | 0 days 20:06:05 | 2 days 02:00:00 | 0.402028 | 20 hours 6 minutes |
Betriebssysteme und Echtzeit | 3 | True | Reutlingen | False | Betriebssysteme und Echtzeit | 1 days 04:27:13 | 3 days 03:00:00 | 0.379381 | 28 hours 27 minutes |
Betriebssysteme und Echtzeit Praktikum | 1 | True | Reutlingen | False | Betriebssysteme und Echtzeit Praktikum | 0 days 13:14:22 | 1 days 01:00:00 | 0.529578 | 13 hours 14 minutes |
Building AI | 2 | True | Helsinki | True | Building AI | 0 days 04:26:13 | 2 days 02:00:00 | 0.088739 | 4 hours 26 minutes |
Business English | 2 | True | Reutlingen | False | Business English | 0 days 03:40:33 | 2 days 02:00:00 | 0.073517 | 3 hours 40 minutes |
Combinatorial Optimization | 5 | True | Helsinki | False | Combinatorial Optimization | 2 days 02:59:31 | 5 days 05:00:00 | 0.407936 | 50 hours 59 minutes |
Data Analysis with Python | 5 | True | Helsinki | True | Data Analysis with Python | 0 days 19:16:09 | 5 days 05:00:00 | 0.154153 | 19 hours 16 minutes |
Design and Analysis of Algorithms | 5 | True | Helsinki | False | Design and Analysis of Algorithms | 1 days 18:54:37 | 5 days 05:00:00 | 0.343282 | 42 hours 54 minutes |
Elektrische Antriebe | 3 | True | Reutlingen | False | Elektrische Antriebe | 1 days 06:03:16 | 3 days 03:00:00 | 0.400726 | 30 hours 3 minutes |
Elektrische Antriebe Praktikum | 2 | True | Reutlingen | False | Elektrische Antriebe Praktikum | 0 days 21:23:57 | 2 days 02:00:00 | 0.427983 | 21 hours 23 minutes |
Elements of AI | 2 | True | Helsinki | True | Elements of AI | 0 days 06:00:31 | 2 days 02:00:00 | 0.120172 | 6 hours 0 minutes |
Finnish 1A | 3 | True | Helsinki | False | Finnish 1A | 0 days 21:27:00 | 3 days 03:00:00 | 0.286000 | 21 hours 27 minutes |
Finnish 1B | 2 | True | Helsinki | False | Finnish 1B | 0 days 22:50:17 | 2 days 02:00:00 | 0.456761 | 22 hours 50 minutes |
Finnish 2A | 3 | True | Helsinki | False | Finnish 2A | 1 days 02:15:33 | 3 days 03:00:00 | 0.350122 | 26 hours 15 minutes |
Introduction to Data Science | 5 | True | Helsinki | False | Introduction to Data Science | 3 days 06:03:35 | 5 days 05:00:00 | 0.624478 | 78 hours 3 minutes |
Kommunikationssysteme | 3 | True | Reutlingen | False | Kommunikationssysteme | 1 days 02:23:43 | 3 days 03:00:00 | 0.351937 | 26 hours 23 minutes |
Kommunikationssysteme Praktikum | 1 | True | Reutlingen | False | Kommunikationssysteme Praktikum | 0 days 10:40:34 | 1 days 01:00:00 | 0.427044 | 10 hours 40 minutes |
Kreativer Systementwurf | 3 | True | Reutlingen | False | Kreativer Systementwurf | 1 days 19:02:09 | 3 days 03:00:00 | 0.573811 | 43 hours 2 minutes |
Leistungselektronik | 3 | True | Reutlingen | False | Leistungselektronik | 1 days 03:37:16 | 3 days 03:00:00 | 0.368281 | 27 hours 37 minutes |
Network Analysis | 5 | True | Helsinki | False | Network Analysis | 1 days 15:12:49 | 5 days 05:00:00 | 0.313709 | 39 hours 12 minutes |
Recht | 2 | True | Reutlingen | False | Recht | 0 days 19:42:00 | 2 days 02:00:00 | 0.394000 | 19 hours 42 minutes |
Regelungstechnik II | 3 | True | Reutlingen | False | Regelungstechnik II | 0 days 20:48:53 | 3 days 03:00:00 | 0.277530 | 20 hours 48 minutes |
Regelungstechnik II Praktikum | 3 | True | Reutlingen | False | Regelungstechnik II Praktikum | 0 days 19:45:18 | 3 days 03:00:00 | 0.263400 | 19 hours 45 minutes |
Robotersysteme | 5 | True | Reutlingen | False | Robotersysteme | 2 days 10:30:34 | 5 days 05:00:00 | 0.468076 | 58 hours 30 minutes |
Robotersysteme Praktikum | 3 | True | Reutlingen | False | Robotersysteme Praktikum | 0 days 22:06:53 | 3 days 03:00:00 | 0.294863 | 22 hours 6 minutes |
Statistical Machine Learning | 9 | True | Tübingen | False | Statistical Machine Learning | 5 days 09:41:23 | 9 days 09:00:00 | 0.576399 | 129 hours 41 minutes |
Theoretische Informatik | 9 | True | Tübingen | False | Theoretische Informatik | 4 days 02:52:57 | 9 days 09:00:00 | 0.439478 | 98 hours 52 minutes |
lectures['Time fraction'].mean()
0.386641850145076
lectures.groupby('University')['Time fraction'].mean()
University Helsinki 0.340383 Reutlingen 0.399922 Tübingen 0.481007 Name: Time fraction, dtype: float64
lectures.groupby(['University', 'online course'])['Time fraction'].mean()
University online course Helsinki False 0.422643 True 0.121021 Reutlingen False 0.399922 Tübingen False 0.481007 Name: Time fraction, dtype: float64
sns.set_theme(style='ticks', palette='pastel')
# Draw a nested boxplot to show bills by day and time
sns.boxplot(x='University', y='Time fraction',
data=lectures)
sns.despine(offset=10, trim=True)
plt.show()
sns.set_theme(style='whitegrid')
# Initialize the figure
f, ax = plt.subplots()
sns.despine(bottom=True, left=True)
# Show each observation with a scatterplot
sns.stripplot(x='University', y='Time fraction',
data=lectures, dodge=True, alpha=.4, zorder=1)
# Show the conditional means
sns.pointplot(x='University', y='Time fraction',
data=lectures, dodge=.532, join=False, palette="dark",
markers='d', ci=None)
plt.show()
fig = px.box(lectures, x='University', color='University', y='Time fraction', points='all', hover_name='Lecture', hover_data=['ECTS', 'Time spent'],
title='Time fraction distribution per University')
fig.update(layout_showlegend=False)
fig.show(renderer='notebook')
fig = px.box(lectures, x='University', color='online course', y='Time fraction', points='all', hover_name='Lecture', hover_data=['ECTS', 'Time spent'],
title='Time fraction distribution per University')
fig.show(renderer='notebook')