Covered cases of fewer documents available and added ort.txt
This commit is contained in:
parent
45813e5186
commit
a29d96c760
|
@ -3,6 +3,9 @@ Changelog
|
||||||
|
|
||||||
This changelog contains a list of versions with their respective high-level changes.
|
This changelog contains a list of versions with their respective high-level changes.
|
||||||
|
|
||||||
|
v0.3.5:
|
||||||
|
- covered case where not all types of documents are available
|
||||||
|
- added ort.txt in document output, contains the location of the session
|
||||||
v0.3.4:
|
v0.3.4:
|
||||||
- fixed some typos
|
- fixed some typos
|
||||||
- fixed not working badges
|
- fixed not working badges
|
||||||
|
|
|
@ -6,7 +6,7 @@ ALLRIS Scraper
|
||||||
.. image:: https://img.shields.io/pypi/pyversions/twomartens.allrisscraper.svg
|
.. image:: https://img.shields.io/pypi/pyversions/twomartens.allrisscraper.svg
|
||||||
:alt: Python 3.7 and 3.8
|
:alt: Python 3.7 and 3.8
|
||||||
.. image:: https://img.shields.io/pypi/v/twomartens.allrisscraper.svg
|
.. image:: https://img.shields.io/pypi/v/twomartens.allrisscraper.svg
|
||||||
:alt: version 0.3.4
|
:alt: version 0.3.5
|
||||||
|
|
||||||
This scraper requires your username and password and performs the following tasks for you:
|
This scraper requires your username and password and performs the following tasks for you:
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -30,7 +30,7 @@ setup(
|
||||||
author="Jim Martens",
|
author="Jim Martens",
|
||||||
author_email="github@2martens.de",
|
author_email="github@2martens.de",
|
||||||
url="https://git.2martens.de/2martens/allris-scraper",
|
url="https://git.2martens.de/2martens/allris-scraper",
|
||||||
version="0.3.4",
|
version="0.3.5",
|
||||||
namespace_packages=["twomartens"],
|
namespace_packages=["twomartens"],
|
||||||
packages=find_packages('src', exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
|
packages=find_packages('src', exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
|
||||||
package_dir={'': 'src'},
|
package_dir={'': 'src'},
|
||||||
|
|
|
@ -118,12 +118,16 @@ def download_documents(driver: webdriver.Firefox, meetings: List[meeting.Meeting
|
||||||
td = driver.find_element(By.XPATH, "//table[@class='tk1']//td[@class='me1']")
|
td = driver.find_element(By.XPATH, "//table[@class='tk1']//td[@class='me1']")
|
||||||
form_elements = td.find_elements_by_tag_name("form")
|
form_elements = td.find_elements_by_tag_name("form")
|
||||||
agenda_link, total_link, invitation_link = get_links(form_elements, base_link)
|
agenda_link, total_link, invitation_link = get_links(form_elements, base_link)
|
||||||
|
if len(agenda_link) > 0:
|
||||||
driver.get(agenda_link)
|
driver.get(agenda_link)
|
||||||
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Tagesordnung.pdf")
|
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Tagesordnung.pdf")
|
||||||
|
if len(total_link) > 0:
|
||||||
driver.get(total_link)
|
driver.get(total_link)
|
||||||
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Mappe.pdf")
|
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Mappe.pdf")
|
||||||
|
if len(invitation_link) > 0:
|
||||||
driver.get(invitation_link)
|
driver.get(invitation_link)
|
||||||
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Einladung.pdf")
|
save_pdf(driver.current_url, f"{get_formatted_filename(pdf_location, _meeting, district)}/Einladung.pdf")
|
||||||
|
save_file(_meeting.location, f"{get_formatted_filename(pdf_location, _meeting, district)}/ort.txt")
|
||||||
|
|
||||||
|
|
||||||
def get_links(form_elements: List[WebElement], base_link: str) -> Tuple[str, str, str]:
|
def get_links(form_elements: List[WebElement], base_link: str) -> Tuple[str, str, str]:
|
||||||
|
@ -146,6 +150,13 @@ def get_links(form_elements: List[WebElement], base_link: str) -> Tuple[str, str
|
||||||
if name == invitation_name:
|
if name == invitation_name:
|
||||||
links[invitation_name] = link
|
links[invitation_name] = link
|
||||||
|
|
||||||
|
if agenda_name not in links:
|
||||||
|
links[agenda_name] = ""
|
||||||
|
if invitation_name not in links:
|
||||||
|
links[invitation_name] = ""
|
||||||
|
if total_short_name not in links:
|
||||||
|
links[total_short_name] = ""
|
||||||
|
|
||||||
return links[agenda_name], links[total_short_name], links[invitation_name]
|
return links[agenda_name], links[total_short_name], links[invitation_name]
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,6 +172,12 @@ def save_pdf(url: str, dest: str) -> None:
|
||||||
file.write(data_to_write)
|
file.write(data_to_write)
|
||||||
|
|
||||||
|
|
||||||
|
def save_file(content: str, dest: str) -> None:
|
||||||
|
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
||||||
|
with open(dest, "w") as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
|
||||||
def get_day(date_str: str) -> date:
|
def get_day(date_str: str) -> date:
|
||||||
date_elements = date_str[date_str.find(",") + 1:].split(".")
|
date_elements = date_str[date_str.find(",") + 1:].split(".")
|
||||||
return date(int(date_elements[-1]), int(date_elements[-2]), int(date_elements[-3]))
|
return date(int(date_elements[-1]), int(date_elements[-2]), int(date_elements[-3]))
|
||||||
|
|
Loading…
Reference in New Issue