diff --git a/main.py b/main.py index cf8836e..a0d0c59 100644 --- a/main.py +++ b/main.py @@ -36,15 +36,16 @@ return result_links -def extract_text_from_links(links): +def extract_text_from_links(links, timeout=5): extracted_texts = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } for link in links: + print("downloading text from: " + link) try: - response = requests.get(link, headers=headers) + response = requests.get(link, headers=headers, timeout=timeout) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') # Extract text from the page