Parse consultations and store with motion

This commit is contained in:
2020-07-05 17:08:12 +02:00
parent 6ebce4f03f
commit 7424492c3a
2 changed files with 30 additions and 9 deletions

View File

@ -21,6 +21,8 @@ from typing import List
@dataclass @dataclass
class Consultation: class Consultation:
authoritative: bool authoritative: bool
meeting: str
organization: List[str]
role: str role: str
@ -29,9 +31,10 @@ class Motion:
name: str name: str
reference: str reference: str
type: str type: str
underDirectionOf: str under_direction_of: str
context: str context: str
petition: str petition: str
consultations: List[Consultation]
@dataclass @dataclass
@ -47,4 +50,4 @@ class AgendaItem:
@dataclass @dataclass
class Agenda: class Agenda:
agendaItems: List[AgendaItem] agenda_items: List[AgendaItem]

View File

@ -1,11 +1,11 @@
import configparser import configparser
import dataclasses
import json import json
import os import os
from datetime import date from datetime import date
from datetime import time from datetime import time
from typing import Dict from typing import Dict
from typing import List from typing import List
from typing import Optional
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@ -16,8 +16,8 @@ from selenium.webdriver.remote.webelement import WebElement
from twomartens.allrisscraper import agenda from twomartens.allrisscraper import agenda
from twomartens.allrisscraper import config as config_module from twomartens.allrisscraper import config as config_module
from twomartens.allrisscraper import definitions
from twomartens.allrisscraper import custom_json from twomartens.allrisscraper import custom_json
from twomartens.allrisscraper import definitions
from twomartens.allrisscraper import meeting from twomartens.allrisscraper import meeting
from twomartens.allrisscraper.definitions import MONTHS from twomartens.allrisscraper.definitions import MONTHS
from twomartens.allrisscraper.meeting import Meeting from twomartens.allrisscraper.meeting import Meeting
@ -140,7 +140,7 @@ def process_agenda_item(index: int, item: WebElement) -> agenda.AgendaItem:
def get_motions(driver: webdriver.Firefox, meetings: List[meeting.Meeting]) -> Dict[str, agenda.Motion]: def get_motions(driver: webdriver.Firefox, meetings: List[meeting.Meeting]) -> Dict[str, agenda.Motion]:
motions: Dict[str, agenda.Motion] = dict() motions: Dict[str, agenda.Motion] = dict()
for _meeting in meetings: for _meeting in meetings:
agenda_items = _meeting.agenda.agendaItems agenda_items = _meeting.agenda.agenda_items
for agenda_item in agenda_items: for agenda_item in agenda_items:
if agenda_item.motion_link is None: if agenda_item.motion_link is None:
continue continue
@ -156,6 +156,24 @@ def get_motion(driver: webdriver.Firefox, link: str, reference: str) -> agenda.M
name = str(meta_trs[0].find_element(By.XPATH, "td[2]").text).strip() name = str(meta_trs[0].find_element(By.XPATH, "td[2]").text).strip()
motion_type = str(meta_trs[1].find_element(By.XPATH, "td[4]").text).strip() motion_type = str(meta_trs[1].find_element(By.XPATH, "td[4]").text).strip()
under_direction_of = str(meta_trs[2].find_element(By.XPATH, "td[2]").text).strip() under_direction_of = str(meta_trs[2].find_element(By.XPATH, "td[2]").text).strip()
consultation_trs = meta_trs[4].find_elements(
By.XPATH,
".//table//tr")[1:]
current_organization: Optional[str] = None
current_role: Optional[str] = None
consultations = []
for consultation_tr in consultation_trs:
tds = consultation_tr.find_elements_by_xpath("td")
is_organization_header = tds[1].get_attribute("class") == "text1"
if is_organization_header:
current_organization = str(tds[1].text).strip()
current_role = str(tds[2].text).strip()
else:
authoritative = str(tds[0].get_property("title")).strip() == "Erledigt"
meeting_link = str(tds[3].find_element_by_xpath("a").get_property("href")).strip()
consultations.append(agenda.Consultation(
authoritative, meeting_link,
[current_organization], current_role))
text_divs = driver.find_elements(By.XPATH, "//table[@class='risdeco']//tr[2]//td[2]//div") text_divs = driver.find_elements(By.XPATH, "//table[@class='risdeco']//tr[2]//td[2]//div")
context_div = text_divs[0] context_div = text_divs[0]
@ -176,8 +194,8 @@ def get_motion(driver: webdriver.Firefox, link: str, reference: str) -> agenda.M
petition.rstrip() petition.rstrip()
return agenda.Motion(name=name, reference=reference, return agenda.Motion(name=name, reference=reference,
type=motion_type, underDirectionOf=under_direction_of, type=motion_type, under_direction_of=under_direction_of,
context=context, petition=petition) context=context, petition=petition, consultations=consultations)
if __name__ == "__main__": if __name__ == "__main__":