Talk Topic Assignment¶
In [ ]:
import glob
import json
import os
import numpy as np
In [ ]:
NORMALIZATION_MAP = {
# A book
"balaji2015": None,
"jaaskelainen2016": "jaaskelainen2015",
"bezanson2015": "bezanson2017",
"frigo1999": None,
"trott2014": "edwards2014",
}
def normalize_paper(s):
s = (
s
.lower()
.replace(" ", "")
.replace("(", "")
.replace(")", "")
.replace(",", "")
.replace("ä", "a")
.replace("-", "")
.replace("jasskel", "jaaskel")
)
return NORMALIZATION_MAP.get(s, s)
Read and normalize data:
In [ ]:
all_prefs = {}
for fname in glob.glob("../../../grade/talk-assignment/*.json"):
at_index = fname.find("@")
if at_index < 0:
continue
name = os.path.basename(fname[:at_index])
with open(fname, "rb") as inf:
all_prefs[name] = json.load(inf)
for name in all_prefs:
all_prefs[name] = [all_prefs[name]["pref%d" % i] for i in range(1, 5)]
other_count = [0]
def unique_other(s):
if s == "other":
other_count[0] += 1
return "other%d" % other_count[0]
return s
for name in all_prefs:
all_prefs[name] = [
unique_other(normalize_paper(s)) for s in all_prefs[name]]
for name in all_prefs:
all_prefs[name] = [
s for s in all_prefs[name] if s is not None]
# sort keys, because hey: ordered dicts
all_prefs = {name: all_prefs[name] for name in sorted(all_prefs)}
Build cost matrix:
In [ ]:
papers = sorted(set(
paper
for pref in all_prefs.values()
for paper in pref))
if 0:
print(papers)
names = list(all_prefs)
cost_matrix = np.empty((len(names), len(papers)))
cost_matrix.fill(100)
for name, prefs in all_prefs.items():
row = names.index(name)
for i, pref in enumerate(prefs):
col = papers.index(pref)
cost_matrix[row, col] = i
Compute solution:
In [ ]:
from scipy.optimize import linear_sum_assignment
name_indices, paper_indices = linear_sum_assignment(cost_matrix)
for ni, pi in zip(name_indices, paper_indices):
print(f"{names[ni]}: {papers[pi]} -> cost: {cost_matrix[ni, pi]}")
print()
print(f"Total cost: {cost_matrix[name_indices, paper_indices].sum()}")
In [ ]: