Talk Topic Assignment¶

In [ ]:
import glob
import json
import os
import numpy as np
In [ ]:
NORMALIZATION_MAP = {
        # A book
        "balaji2015": None,
        "jaaskelainen2016":  "jaaskelainen2015",
        "bezanson2015":  "bezanson2017",
        "frigo1999":  None,
        "trott2014": "edwards2014",
        }


def normalize_paper(s):
    s = (
            s
            .lower()
            .replace(" ", "")
            .replace("(", "")
            .replace(")", "")
            .replace(",", "")
            .replace("ä", "a")
            .replace("-", "")
            .replace("jasskel", "jaaskel")
            )
    return NORMALIZATION_MAP.get(s, s)

Read and normalize data:

In [ ]:
all_prefs = {}
for fname in glob.glob("../../../grade/talk-assignment/*.json"):
    at_index = fname.find("@")
    if at_index < 0:
        continue

    name = os.path.basename(fname[:at_index])

    with open(fname, "rb") as inf:
        all_prefs[name] = json.load(inf)

for name in all_prefs:
    all_prefs[name] = [all_prefs[name]["pref%d" % i] for i in range(1, 5)]

other_count = [0]

def unique_other(s):
    if s == "other":
        other_count[0] += 1
        return "other%d" % other_count[0]
    return s

for name in all_prefs:
    all_prefs[name] = [
            unique_other(normalize_paper(s)) for s in all_prefs[name]]

for name in all_prefs:
    all_prefs[name] = [
            s for s in all_prefs[name] if s is not None]
    
# sort keys, because hey: ordered dicts
all_prefs = {name: all_prefs[name] for name in sorted(all_prefs)}

Build cost matrix:

In [ ]:
papers = sorted(set(
        paper
        for pref in all_prefs.values()
        for paper in pref))
if 0:
    print(papers)

names = list(all_prefs)

cost_matrix = np.empty((len(names), len(papers)))
cost_matrix.fill(100)

for name, prefs in all_prefs.items():
    row = names.index(name)
    for i, pref in enumerate(prefs):
        col = papers.index(pref)
        cost_matrix[row, col] = i

Compute solution:

In [ ]:
from scipy.optimize import linear_sum_assignment
name_indices, paper_indices = linear_sum_assignment(cost_matrix)
for ni, pi in zip(name_indices, paper_indices):
    print(f"{names[ni]}: {papers[pi]} -> cost: {cost_matrix[ni, pi]}")

print()
print(f"Total cost: {cost_matrix[name_indices, paper_indices].sum()}")
In [ ]: