From e124e93b988b9ccbf1b17a86dc99490b6ef0b19b Mon Sep 17 00:00:00 2001 From: E-Berry Date: Thu, 11 Jan 2024 15:15:01 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=94=A7=20fix(lyrics):=20update=20Goog?= =?UTF-8?q?le=20Custom=20Search=20API=20endpoint=20The=20site=5Frestricted?= =?UTF-8?q?=5Fapi=20will=20soon=20be=20dead=20and=20can=20currently=20only?= =?UTF-8?q?=20be=20used=20by=20old=20customers.=20Tested=20a=20typical=20s?= =?UTF-8?q?earch=20engine=20on=20my=20end,=20restricted=20to=20all=20the?= =?UTF-8?q?=20websites=20made=20to=20be=20scraped=20and=20all=20the=20thre?= =?UTF-8?q?e=20tests=20passed.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lyrics_extractor/lyrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lyrics_extractor/lyrics.py b/lyrics_extractor/lyrics.py index abefd03..2e01eb4 100644 --- a/lyrics_extractor/lyrics.py +++ b/lyrics_extractor/lyrics.py @@ -137,7 +137,7 @@ def __init__(self, gcs_api_key: str, gcs_engine_id: str): self.GCS_ENGINE_ID = gcs_engine_id def __handle_search_request(self, song_name): - url = "https://www.googleapis.com/customsearch/v1/siterestrict" + url = "https://www.googleapis.com/customsearch/v1" params = { 'key': self.GCS_API_KEY, 'cx': self.GCS_ENGINE_ID, From c425e2a41339df07dceef7942d768eac943d5420 Mon Sep 17 00:00:00 2001 From: E-Berry Date: Thu, 11 Jan 2024 19:39:51 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=94=A7=20feat(lyrics):=20add=20user-a?= =?UTF-8?q?gent=20headers=20to=20requests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lyrics_extractor/lyrics.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lyrics_extractor/lyrics.py b/lyrics_extractor/lyrics.py index 2e01eb4..9541b0e 100644 --- a/lyrics_extractor/lyrics.py +++ b/lyrics_extractor/lyrics.py @@ -129,6 +129,10 @@ class SongLyrics: 'lyricsmint': scraper_factory.lyricsmint_scraper, } + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" + } + def __init__(self, gcs_api_key: str, gcs_engine_id: str): if type(gcs_api_key) != str or type(gcs_engine_id) != str: raise TypeError("API key and engine ID must be a string.") @@ -144,7 +148,7 @@ def __handle_search_request(self, song_name): 'q': '{} lyrics'.format(song_name), } - response = requests.get(url, params=params) + response = requests.get(url, params=params, headers=self.headers) data = response.json() if response.status_code != 200: raise LyricScraperException(data) @@ -152,7 +156,7 @@ def __handle_search_request(self, song_name): def __extract_lyrics(self, result_url, title): # Get the page source code - page = requests.get(result_url) + page = requests.get(result_url, headers=self.headers) source_code = BeautifulSoup(page.content, 'lxml') self.scraper_factory(source_code, title)