Fixed wrong links after fourth link was added in ALLRIS

This commit is contained in:
2020-01-13 08:07:44 +01:00
parent 1036e77c5e
commit e5dd1479c7
3 changed files with 28 additions and 9 deletions

View File

@ -2,7 +2,7 @@
![Apache Licence 2.0](https://img.shields.io/pypi/l/twomartens.allrisscraper) ![Apache Licence 2.0](https://img.shields.io/pypi/l/twomartens.allrisscraper)
![Supports Python 3.7 and 3.8](https://img.shields.io/pypi/pyversions/twomartens.allrisscraper) ![Supports Python 3.7 and 3.8](https://img.shields.io/pypi/pyversions/twomartens.allrisscraper)
![version 0.3.1](https://img.shields.io/pypi/v/twomartens.allrisscraper) ![version 0.3.2](https://img.shields.io/pypi/v/twomartens.allrisscraper)
This scraper requires your username and password and performs the following tasks for you: This scraper requires your username and password and performs the following tasks for you:

View File

@ -30,7 +30,7 @@ setup(
author="Jim Martens", author="Jim Martens",
author_email="github@2martens.de", author_email="github@2martens.de",
url="https://git.2martens.de/2martens/allris-scraper", url="https://git.2martens.de/2martens/allris-scraper",
version="0.3.1", version="0.3.2",
namespace_packages=["twomartens"], namespace_packages=["twomartens"],
packages=find_packages('src', exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), packages=find_packages('src', exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
package_dir={'': 'src'}, package_dir={'': 'src'},

View File

@ -20,11 +20,12 @@ from datetime import date
from datetime import time from datetime import time
import os import os
from typing import List from typing import List, Tuple
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.firefox import webdriver from selenium.webdriver.firefox import webdriver
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.remote.webelement import WebElement
from twomartens.allrisscraper import meeting from twomartens.allrisscraper import meeting
from twomartens.allrisscraper import definitions from twomartens.allrisscraper import definitions
@ -113,12 +114,7 @@ def download_documents(driver: webdriver.WebDriver, meetings: List[meeting.Meeti
driver.get(_meeting.link) driver.get(_meeting.link)
td = driver.find_element(By.XPATH, "//table[@class='tk1']//td[@class='me1']") td = driver.find_element(By.XPATH, "//table[@class='tk1']//td[@class='me1']")
form_elements = td.find_elements_by_tag_name("form") form_elements = td.find_elements_by_tag_name("form")
agenda_item = form_elements[0] agenda_link, total_link, invitation_link = get_links(form_elements, base_link)
agenda_link = f"{base_link}?DOLFDNR={agenda_item.find_element_by_name('DOLFDNR').get_property('value')}&options=64"
total_item = form_elements[1]
total_link = f"{base_link}?DOLFDNR={total_item.find_element_by_name('DOLFDNR').get_property('value')}&options=64"
invitation_item = form_elements[2]
invitation_link = f"{base_link}?DOLFDNR={invitation_item.find_element_by_name('DOLFDNR').get_property('value')}&options=64"
driver.get(agenda_link) driver.get(agenda_link)
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Tagesordnung.pdf") save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Tagesordnung.pdf")
driver.get(total_link) driver.get(total_link)
@ -127,6 +123,29 @@ def download_documents(driver: webdriver.WebDriver, meetings: List[meeting.Meeti
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Einladung.pdf") save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Einladung.pdf")
def get_links(form_elements: List[WebElement], base_link: str) -> Tuple[str, str, str]:
agenda_name = "Tagesordnung"
updated_agenda_name = "Aktuelle TO"
total_name = "Alle Dokumente zur Sitzung im Paket"
total_short_name = "Mappe"
invitation_name = "Einladung"
links = {}
for element in form_elements:
name = element.find_element_by_class_name("il2_p").get_property("value")
link = f"{base_link}?DOLFDNR={element.find_element_by_name('DOLFDNR').get_property('value')}&options=64"
if name == agenda_name:
links[agenda_name] = link
if name == updated_agenda_name:
links[agenda_name] = link
if name == total_name:
links[total_short_name] = link
if name == invitation_name:
links[invitation_name] = link
return links[agenda_name], links[total_short_name], links[invitation_name]
def get_formatted_filename(pdf_location: str, meeting_obj: meeting.Meeting, district: str) -> str: def get_formatted_filename(pdf_location: str, meeting_obj: meeting.Meeting, district: str) -> str:
return f"{pdf_location}{meeting_obj.date.isoformat()}_{get_abbreviated_committee_name(meeting_obj.name, district)}" return f"{pdf_location}{meeting_obj.date.isoformat()}_{get_abbreviated_committee_name(meeting_obj.name, district)}"