156 lines
6.8 KiB
Python
156 lines
6.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2020 Jim Martens
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from typing import Dict
|
|
from typing import List
|
|
from typing import Optional
|
|
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.remote.webelement import WebElement
|
|
|
|
from twomartens.allrisscraper import meeting
|
|
from twomartens.allrisscraper import data_types as types
|
|
|
|
XPATH_2ND_TD = "td[2]"
|
|
|
|
|
|
def process_agendas(driver: webdriver.Firefox, meetings: List[meeting.Meeting]) -> None:
|
|
for meeting_obj in meetings:
|
|
process_agenda(driver, meeting_obj)
|
|
|
|
|
|
def process_agenda(driver: webdriver.Firefox, meeting_obj: meeting.Meeting) -> None:
|
|
driver.get(meeting_obj.link)
|
|
td = driver.find_element_by_xpath("//table[@class='risdeco']//tr[2]//td[2]")
|
|
tables = td.find_elements_by_xpath("table")
|
|
meta_table = tables[0]
|
|
agenda_table = tables[1]
|
|
meta_trs = meta_table.find_elements_by_xpath("./tbody//tr//td[1]//tr")
|
|
meeting_obj.address = str(meta_trs[5].find_element_by_xpath(XPATH_2ND_TD).text)
|
|
|
|
agenda_item_trs = agenda_table.find_elements(
|
|
By.XPATH,
|
|
".//tr[not(descendant::th) and not(descendant::td[contains(@colspan, '7')])]")
|
|
agenda_item_trs = agenda_item_trs[:-1]
|
|
|
|
agenda_items = list()
|
|
for index, agenda_item_tr in enumerate(agenda_item_trs):
|
|
agenda_items.append(process_agenda_item(index, agenda_item_tr))
|
|
meeting_obj.agenda = types.Agenda(agenda_items)
|
|
|
|
|
|
def process_agenda_item(index: int, item: WebElement) -> types.AgendaItem:
|
|
tds = item.find_elements_by_xpath("td")
|
|
item_link = str(tds[0].find_element_by_tag_name("a").get_property("href")).strip()
|
|
number = str(tds[0].find_element_by_tag_name("a").text).strip()
|
|
name = str(tds[3].text).strip()
|
|
public = "Ö" in number
|
|
motion_td = str(tds[5].text).strip()
|
|
has_motion = len(motion_td) != 0
|
|
motion_link = None
|
|
motion_reference = None
|
|
if has_motion:
|
|
motion_link = str(tds[5].find_element_by_tag_name("a").get_property("href")).strip()
|
|
motion_reference = str(tds[5].find_element_by_tag_name("a").text).strip()
|
|
|
|
return types.AgendaItem(number=number, order=index, name=name,
|
|
public=public, link=item_link,
|
|
motion_link=motion_link, motion_reference=motion_reference,
|
|
resolution_text="")
|
|
|
|
|
|
def get_motions(driver: webdriver.Firefox, meetings: List[meeting.Meeting]) -> Dict[str, types.Motion]:
|
|
motions: Dict[str, types.Motion] = dict()
|
|
for _meeting in meetings:
|
|
agenda_items = _meeting.agenda.agenda_items
|
|
for agenda_item in agenda_items:
|
|
if agenda_item.motion_link is None:
|
|
continue
|
|
motions[agenda_item.motion_reference] = get_motion(driver=driver, agenda_item_link=agenda_item.link,
|
|
link=agenda_item.motion_link,
|
|
reference=agenda_item.motion_reference)
|
|
return motions
|
|
|
|
|
|
def get_motion(driver: webdriver.Firefox, agenda_item_link: str, link: str, reference: str) -> types.Motion:
|
|
driver.get(link)
|
|
meta_table = driver.find_element_by_xpath("//table[@class='risdeco']//tr[2]//td[2]//table//tr//td[1]//table")
|
|
meta_trs = meta_table.find_elements_by_xpath("./tbody//tr")
|
|
name = str(meta_trs[0].find_element_by_xpath(XPATH_2ND_TD).text).strip()
|
|
motion_type = str(meta_trs[1].find_element_by_xpath("td[4]").text).strip()
|
|
under_direction_of = str(meta_trs[2].find_element_by_xpath(XPATH_2ND_TD).text).strip()
|
|
consultation_trs = meta_trs[4].find_elements_by_xpath(".//table//tr")[1:]
|
|
current_organization: Optional[str] = None
|
|
current_role: Optional[str] = None
|
|
consultations = []
|
|
for consultation_tr in consultation_trs:
|
|
tds = consultation_tr.find_elements_by_xpath("td")
|
|
is_organization_header = tds[1].get_attribute("class") == "text1"
|
|
if is_organization_header:
|
|
current_organization = str(tds[1].text).strip()
|
|
if len(tds) >= 3:
|
|
current_role = str(tds[2].text).strip()
|
|
else:
|
|
current_role = None
|
|
else:
|
|
authoritative = str(tds[0].get_property("title")).strip() == "Erledigt" \
|
|
and str(tds[4].text).strip() in ["beschlossen", "zur Kenntnis genommen", "abgelehnt"]
|
|
link_exists = len(tds[3].find_elements_by_xpath("a")) > 0
|
|
if not link_exists:
|
|
continue
|
|
meeting_link = str(tds[3].find_element_by_xpath("a").get_property("href")).strip()
|
|
consultations.append(types.Consultation(
|
|
authoritative=authoritative, meeting=meeting_link,
|
|
organization=[current_organization], role=current_role,
|
|
agenda_item=agenda_item_link, result=str(tds[2].text).strip()
|
|
))
|
|
|
|
file_table = driver.find_element_by_xpath("//table[@class='risdeco']//tr[2]//td[2]//table//tr//td[3]//table")
|
|
motion_file_form = file_table.find_element_by_xpath(".//tr[2]//td//form[1]")
|
|
hidden_inputs = motion_file_form.find_elements_by_xpath(".//input[contains(@type, 'hidden')]")
|
|
file_link = ""
|
|
for hidden_input in hidden_inputs:
|
|
if file_link == "":
|
|
file_link += "?"
|
|
else:
|
|
file_link += "&"
|
|
file_link += f"{hidden_input.get_property('name')}={hidden_input.get_property('value')}"
|
|
file_link = f"{motion_file_form.get_property('action')}{file_link}"
|
|
|
|
text_divs = driver.find_elements_by_xpath("//table[@class='risdeco']//tr[2]//td[2]//div")
|
|
context_div = text_divs[0]
|
|
context_ps = context_div.find_elements_by_xpath("p")[1:-1]
|
|
context = ""
|
|
for p in context_ps:
|
|
if len(context) > 0:
|
|
context += "\n"
|
|
context += str(p.text).strip()
|
|
|
|
petition_div = text_divs[1]
|
|
petition_ps = petition_div.find_elements_by_xpath("p")[1:-1]
|
|
petition = ""
|
|
for p in petition_ps:
|
|
if len(petition) > 0:
|
|
petition += "\n"
|
|
petition += str(p.text).strip()
|
|
petition.rstrip()
|
|
|
|
return types.Motion(name=name, reference=reference,
|
|
type=motion_type, under_direction_of=under_direction_of,
|
|
context=context, petition=petition, consultations=consultations,
|
|
file=file_link)
|