diff --git a/src/twomartens/allrisscraper/agenda.py b/src/twomartens/allrisscraper/agenda.py new file mode 100644 index 0000000..a303c9d --- /dev/null +++ b/src/twomartens/allrisscraper/agenda.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Jim Martens +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import List, Dict + + +@dataclass +class Motion: + id: str + title: str + text: str + + +@dataclass +class Agenda: + noticesOfChair: Dict[str, Motion] + noticesOfAdministration: Dict[str, Motion] + motions: Dict[str, Motion] diff --git a/src/twomartens/allrisscraper/main.py b/src/twomartens/allrisscraper/main.py index d89e606..5f0784f 100644 --- a/src/twomartens/allrisscraper/main.py +++ b/src/twomartens/allrisscraper/main.py @@ -76,12 +76,14 @@ def main() -> None: options.headless = True binary = FirefoxBinary(firefox_binary) driver = webdriver.Firefox(firefox_binary=binary, options=options) + driver.delete_all_cookies() driver.implicitly_wait(2) driver.get(ALLRIS_LOGIN) login(driver, username=username, password=password) driver.get("https://serviceportal.hamburg.de/HamburgGateway/Service/StartService/ALLMAnd") driver.get(f"{base_url}/si012.asp") meetings = get_meetings(driver) + fill_agendas_committees(driver, meetings) download_documents(driver, meetings, pdf_location, base_url, district) driver.close() @@ -106,9 +108,24 @@ def get_meetings(driver: webdriver.Firefox) -> List[meeting.Meeting]: agenda_link = tds[4].find_element_by_tag_name("a").get_property("href") name = tds[4].find_element_by_tag_name("a").text location = tds[5].text - meetings.append(meeting.Meeting(name, date_obj, time_obj, agenda_link, location)) + meetings.append(meeting.Meeting(name, date_obj, time_obj, agenda_link, location, None)) return meetings + + +def fill_agendas_committees(driver: webdriver.Firefox, meetings: List[meeting.Meeting]) -> None: + notices_of_chair = "Mitteilungen der/des Vorsitzenden" + notices_of_administration = "Mitteilungen der Verwaltung" + motions = "Anträge / Vorlagen der Verwaltung" + for _meeting in meetings: + driver.get(_meeting.link) + td = driver.find_element(By.XPATH, "//td[text()='" + notices_of_chair + "']") + topChair = td.find_element(By.XPATH, '..').find_element(By.CSS_SELECTOR, 'td:first-child').find_element_by_tag_name("a").text + td = driver.find_element(By.XPATH, "//td[text()='" + notices_of_administration + "']") + topAdmin = td.find_element(By.XPATH, '..').find_element(By.CSS_SELECTOR, 'td:first-child').find_element_by_tag_name("a").text + td = driver.find_element(By.XPATH, "//td[text()='" + motions + "']") + topMotions = td.find_element(By.XPATH, '..').find_element(By.CSS_SELECTOR, 'td:first-child').find_element_by_tag_name("a").text + pass def download_documents(driver: webdriver.Firefox, meetings: List[meeting.Meeting], diff --git a/src/twomartens/allrisscraper/meeting.py b/src/twomartens/allrisscraper/meeting.py index 21e5fec..f72100d 100644 --- a/src/twomartens/allrisscraper/meeting.py +++ b/src/twomartens/allrisscraper/meeting.py @@ -15,6 +15,9 @@ # limitations under the License. import datetime from dataclasses import dataclass +from typing import Optional + +from twomartens.allrisscraper.agenda import Agenda @dataclass @@ -24,3 +27,4 @@ class Meeting: time: datetime.time link: str location: str + agenda: Optional[Agenda]