Verified Commit 9ba087a1 authored by Jakob Moser's avatar Jakob Moser
Browse files

Add basic budget plan downloader

parent ac335533
Loading
Loading
Loading
Loading
+0 −0

Empty file added.

+68 −0
Original line number Diff line number Diff line
import requests
import re
from bs4 import BeautifulSoup
from pathlib import Path

from ..schwalbe.studienfachschaften import get_studienfachschaften


def parse_common_budget_plan_element(element):
    studienfachschaft_name = element.text.split("\n")[0].strip()
    plan_links = {a.text: a.attrs["href"] for a in element.find_all("a")}

    return studienfachschaft_name, plan_links


def is_common_budget_plan_element(element):
    return len(re.findall(r"\n", element.text)) == 1


def get_budget_plan_links():
    page = requests.get("https://www.stura.uni-heidelberg.de/finanzen/haushalt/")
    soup = BeautifulSoup(page.text, "html.parser")

    def is_budget_plans_box(element):
        return element.text.strip().startswith("Budgetpläne der Fachschaften")

    budget_plan_elements = soup.find(is_budget_plans_box).find("ul").find_all("li")

    return dict(
        parse_common_budget_plan_element(element)
        for element in budget_plan_elements
        if is_common_budget_plan_element(element)
    )


def get_budget_plan_links_by_studienfachschaft_uuid():
    studienfachschaften = get_studienfachschaften()
    budget_plan_links_by_studienfachschaft_name = get_budget_plan_links()

    result = {}

    for (
        studienfachschaft_name,
        budget_plan_links,
    ) in budget_plan_links_by_studienfachschaft_name.items():
        for studienfachschaft in studienfachschaften:
            if studienfachschaft.name == studienfachschaft_name:
                result[studienfachschaft.uuid] = budget_plan_links
                break
        else:
            print(studienfachschaft_name)  # TODO Handle error

    return result


def download_all_budget_plans():
    links = get_budget_plan_links_by_studienfachschaft_uuid()
    base_dir = Path("instance/budget_plans")

    for studienfachschaft_uuid, budget_plan_links in links.items():
        studienfachschaft_dir = base_dir / str(studienfachschaft_uuid)
        studienfachschaft_dir.mkdir(exist_ok=True, parents=True)

        for year, link in budget_plan_links.items():
            r = requests.get(link, stream=True)
            with open(studienfachschaft_dir / f"{year}.pdf", "wb") as f:
                for chunk in r.iter_content():
                    f.write(chunk)