import requests from bs4 import BeautifulSoup import os # URL from which pdfs to be downloaded book_url = input("Book url: ") # e.g. "https://www.degruyterbrill.com/document/doi/10.1515/9781400882540/html"# pre = book_url[:book_url.find("/document")] html = requests.get(book_url).content.decode() parsed_html = BeautifulSoup(html) title = parsed_html.find("h1", attrs={"class": "title-dgb"}).contents[0] os.makedirs(f"./{title}", exist_ok=True) res = parsed_html.find_all('tr', attrs={"class": "bookTocEntryRow"}) pdf_urls = [tag.find("a")["href"] for tag in res] for i, pdf_url in enumerate(pdf_urls): response = requests.get(pre + pdf_url.replace("html", "pdf")) with open(f'{title}/{i}_{pdf_url[-8:-5]}.pdf', 'wb') as f: f.write(response.content) print(f"saved {i+1} out of {len(pdf_urls)}")