From 3f18c667316811736b2f93d598700a0c41d1a947 Mon Sep 17 00:00:00 2001 From: Yash Date: Sat, 25 Nov 2017 02:12:36 +0530 Subject: [PATCH 01/10] Added a function get_reset to reset the parameters --- gnewsclient/gnewsclient.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index 806c801..8a1fce7 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -45,6 +45,16 @@ def get_config(self): 'query': self.query } return config + + def get_reset(self): + ''' + function to reset the parameters + ''' + self.edition = 'United States (English)' + self.language = 'english' + self.location = None + self.query = None + self.topic = 'top stories' def get_news(self): From 079a253e4fc56581850d3e433bbd2afaf2734f13 Mon Sep 17 00:00:00 2001 From: Yash Sharma <31438680+yashrsharma44@users.noreply.github.com> Date: Sat, 2 Dec 2017 12:38:27 +0530 Subject: [PATCH 02/10] Changed function name from get_reset to reset --- gnewsclient/gnewsclient.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index 8a1fce7..d39d2d4 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -46,7 +46,7 @@ def get_config(self): } return config - def get_reset(self): + def reset(self): ''' function to reset the parameters ''' @@ -140,4 +140,4 @@ def scrape_feed(self, soup): article['img'] = None pass articles.append(article) - return articles \ No newline at end of file + return articles From 957b6057cb7f3f7f3eaad8c33a30f4a8ff3e09b9 Mon Sep 17 00:00:00 2001 From: CodeVINCI Date: Sat, 2 Dec 2017 14:56:37 +0530 Subject: [PATCH 03/10] added requirements.txt --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9ae579f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +bs4==0.0.1 +html5lib==0.999999999 +requests==2.18.4 From 40cd169b51380907f2c7a5419157a7b9f29c87a5 Mon Sep 17 00:00:00 2001 From: Hemnath-D Date: Wed, 6 Dec 2017 20:10:04 +0530 Subject: [PATCH 04/10] empty articles list exception handled --- gnewsclient/gnewsclient.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index d39d2d4..64592cd 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -2,7 +2,7 @@ import requests from bs4 import BeautifulSoup from .utils import editionMap, topicMap, langMap - +from .userexception import NotFound class gnewsclient: @@ -105,9 +105,9 @@ def set_params(self): # setting location if self.location != None: - self.params['geo'] = self.location - # topic overrides location parameter. So, overriding it. - self.params['topic'] = None + self.params['geo'] = self.location + # topic overrides location parameter. So, overriding it. + self.params['topic'] = None # params setting successful return True @@ -140,4 +140,10 @@ def scrape_feed(self, soup): article['img'] = None pass articles.append(article) + try: + if len(articles)==0: + raise NotFound + except NotFound: + print("The articles for the given response are not found") + return return articles From 79d1168f03e2ab54bdaa2d199c7be5f518bce2b9 Mon Sep 17 00:00:00 2001 From: Hemnath-D Date: Wed, 6 Dec 2017 20:12:41 +0530 Subject: [PATCH 05/10] added userexception.py userexception.py includes user defined exception NotFound --- gnewsclient/userexception.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 gnewsclient/userexception.py diff --git a/gnewsclient/userexception.py b/gnewsclient/userexception.py new file mode 100644 index 0000000..077091a --- /dev/null +++ b/gnewsclient/userexception.py @@ -0,0 +1,4 @@ +#User defined exceptions for Gnewsclient +class NotFound(Exception): + """Raised when the list articles in the function scapefeed() is empty""" + pass From 444b7b27c380f7adeb27341841387754f5ae3ec9 Mon Sep 17 00:00:00 2001 From: Subhrajit Prusty Date: Thu, 7 Dec 2017 03:34:07 +0530 Subject: [PATCH 06/10] added cli for gnewsclient, updated README with instructions --- .gitignore | 1 + CLI.md | 46 +++++++++++++++++++++++++++ README.md | 2 ++ gnewsclient/gnewsclient.py | 2 +- gnewsclient/scripts/__init__.py | 0 gnewsclient/scripts/gnews.py | 56 +++++++++++++++++++++++++++++++++ setup.py | 9 ++++-- 7 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 CLI.md create mode 100644 gnewsclient/scripts/__init__.py create mode 100644 gnewsclient/scripts/gnews.py diff --git a/.gitignore b/.gitignore index 4a56e6e..77f6d90 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /dist/ /*.egg /*.egg-info +venv diff --git a/CLI.md b/CLI.md new file mode 100644 index 0000000..ae28c4a --- /dev/null +++ b/CLI.md @@ -0,0 +1,46 @@ +# gnews + +CLI to use gnewsclient + +How to use? +----------- + +To use, it is recommended to make `virtualenv` and then install all required packages: + +* Installing virtualenv: +``` +$ sudo pip install virtualenv +``` +* Making virtualenv: +``` +$ virtualenv venv +``` +* Go to your gnewsclient dir and activate it: +``` +$ . venv/bin/activate +``` +* To install all required packages: + ``` + $ pip install --editable . + or + $ sudo pip install --editable . +``` + + +## Usage: `$ gnews [OPTIONS]` + +``` +Options: + --config shows default config + --query TEXT shows news about query given + --edition TEXT shows news of edition given, default=United States + (English) + --topic TEXT shows topic given, default=top stories + --location TEXT shows news from location given + --language TEXT shows news in language given, default is english + --sheditions shows list of available editions + --shtopics shows list of available topics + --shlangs shows list of available languages + --help Show this message and exit. + + ``` \ No newline at end of file diff --git a/README.md b/README.md index 3669c06..ea179fe 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ To install gnewsclient, simply, $ pip install gnewsclient ``` +To install and use **gnewsclient CLI**, follow instructions here [CLI](CLI.md) + ## Filters Google News feeds use 3 basic filters: diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index 806c801..1cc423b 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -41,7 +41,7 @@ def get_config(self): 'edition': self.edition, 'topic': self.topic, 'language': self.language, - 'loaction': self.location, + 'location': self.location, 'query': self.query } return config diff --git a/gnewsclient/scripts/__init__.py b/gnewsclient/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gnewsclient/scripts/gnews.py b/gnewsclient/scripts/gnews.py new file mode 100644 index 0000000..e0d24a1 --- /dev/null +++ b/gnewsclient/scripts/gnews.py @@ -0,0 +1,56 @@ +import click +from gnewsclient import gnewsclient + +client = gnewsclient() + +@click.command() +@click.option("--config",is_flag=True,help="shows default config") + +@click.option("--query",default=None,help="shows news about query given") +@click.option("--edition",default="United States (English)",help="shows news of edition given, default=United States (English)") +@click.option("--topic",default="top stories",help="shows topic given, default=top stories") +@click.option("--location",default=None,help="shows news from location given") +@click.option("--language",default="english",help="shows news in language given, default is english") + +@click.option("--sheditions",is_flag=True,help="shows list of available editions") +@click.option("--shtopics",is_flag=True,help="shows list of available topics") +@click.option("--shlangs",is_flag=True,help="shows list of available languages") + +def cli(config,query,edition,topic,location,language,shlangs,shtopics,sheditions): + """ CLI to get news """ + + client.query = query + client.edition = edition + client.topic = topic + client.location = location + client.language = language + + if config: + conf = client.get_config() + click.echo("The default configuration : ") + for keys,value in conf.items(): + click.echo(str(keys)+" : "+str(value)) + + elif shlangs: + langs = client.languages + click.echo("The languages supported : ") + for l in langs: + click.echo(l) + + elif sheditions: + editions = client.editions + click.echo("The editions available : ") + for e in editions: + click.echo(e) + + elif shtopics: + tps = client.topics + click.echo("The topics available : ") + for t in tps: + click.echo(t) + else: + neews = client.get_news() + for n in neews: + content = "{}\n{}".format(n['title'],n['link']) + click.echo(content) + click.echo("\n") \ No newline at end of file diff --git a/setup.py b/setup.py index 6b088e8..82f6dd4 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,11 @@ def readme(): author_email = 'nikhilksingh97@gmail.com', license = 'MIT', packages = ['gnewsclient'], - install_requires = ['requests', 'bs4', 'html5lib'], + install_requires = ['requests', 'bs4', 'html5lib', 'Click'], include_package_data = True, - zip_safe = False) \ No newline at end of file + zip_safe = False, + entry_points=''' + [console_scripts] + gnews=gnewsclient.scripts.gnews:cli + ''', + ) \ No newline at end of file From a3df95565dd9527abe8b6822f31efae4c81d2e21 Mon Sep 17 00:00:00 2001 From: nikhilkumarsingh Date: Fri, 8 Dec 2017 17:50:40 +0530 Subject: [PATCH 07/10] minor fix --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 82f6dd4..3cb0880 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def readme(): author = 'Nikhil Kumar Singh', author_email = 'nikhilksingh97@gmail.com', license = 'MIT', - packages = ['gnewsclient'], + packages = ['gnewsclient', 'gnewsclient.scripts'], install_requires = ['requests', 'bs4', 'html5lib', 'Click'], include_package_data = True, zip_safe = False, From 415fccb1632b92aec9f5fead0b0f04ed04fb8444 Mon Sep 17 00:00:00 2001 From: nikhilkumarsingh Date: Fri, 8 Dec 2017 17:51:06 +0530 Subject: [PATCH 08/10] minor formatting --- gnewsclient/gnewsclient.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index 0d26748..da0c539 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -105,9 +105,9 @@ def set_params(self): # setting location if self.location != None: - self.params['geo'] = self.location - # topic overrides location parameter. So, overriding it. - self.params['topic'] = None + self.params['geo'] = self.location + # topic overrides location parameter. So, overriding it. + self.params['topic'] = None # params setting successful return True @@ -144,6 +144,6 @@ def scrape_feed(self, soup): if len(articles)==0: raise NotFound except NotFound: - print("The articles for the given response are not found") - return + print("The articles for the given response are not found.") + return return articles From 93b3d136adaebc2b453a65ec1a054f87007931aa Mon Sep 17 00:00:00 2001 From: nikhilkumarsingh Date: Fri, 8 Dec 2017 17:54:02 +0530 Subject: [PATCH 09/10] new version 1.1.0 --- README.md | 2 +- README.rst | 5 ++++- setup.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ea179fe..42efcef 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![PyPI](https://img.shields.io/badge/PyPi-v1.0.2-f39f37.svg)](https://pypi.python.org/pypi/gnewsclient) +[![PyPI](https://img.shields.io/badge/PyPi-v1.1.0-f39f37.svg)](https://pypi.python.org/pypi/gnewsclient) [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/nikhilkumarsingh/gnewsclient/blob/master/LICENSE.txt) # gnewsclient diff --git a/README.rst b/README.rst index 451bf8f..f910ec8 100644 --- a/README.rst +++ b/README.rst @@ -17,6 +17,9 @@ To install gnewsclient, simply, $ pip install gnewsclient +To install and use **gnewsclient CLI**, follow instructions here +`CLI `__ + Filters ------- @@ -152,7 +155,7 @@ Usage 'estonian', 'indonesian', 'slovenian', 'italian', 'maltese', 'haitian creole', 'esperanto', 'ukrainian', 'afrikaans', 'filipino', 'gujarati', 'hebrew', 'telugu', 'greek', 'persian', 'romanian'] -.. |PyPI| image:: https://img.shields.io/badge/PyPi-v1.0.2-f39f37.svg +.. |PyPI| image:: https://img.shields.io/badge/PyPi-v1.1.0-f39f37.svg :target: https://pypi.python.org/pypi/gnewsclient .. |license| image:: https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000 :target: https://github.com/nikhilkumarsingh/gnewsclient/blob/master/LICENSE.txt diff --git a/setup.py b/setup.py index 3cb0880..6c856b8 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def readme(): pass setup(name = 'gnewsclient', - version = '1.0.2', + version = '1.1.0', classifiers = [ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', From b95c1db8ce8c39ee07c107a48436ef58d3001a68 Mon Sep 17 00:00:00 2001 From: CodeVINCI Date: Tue, 12 Dec 2017 01:29:48 +0530 Subject: [PATCH 10/10] complete article text and metadata extraction --- gnewsclient/gnewsclient.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/gnewsclient/gnewsclient.py b/gnewsclient/gnewsclient.py index da0c539..73e453c 100644 --- a/gnewsclient/gnewsclient.py +++ b/gnewsclient/gnewsclient.py @@ -2,6 +2,7 @@ import requests from bs4 import BeautifulSoup from .utils import editionMap, topicMap, langMap +from newspaper import Article from .userexception import NotFound class gnewsclient: @@ -68,8 +69,14 @@ def get_news(self): soup = self.load_feed() articles = self.scrape_feed(soup) - return articles - + object_list = [] + for a in articles: + article = Articledata(a['link'], title=a['title']) + object_list.append(article) + return object_list + + + def set_params(self): ''' @@ -134,11 +141,6 @@ def scrape_feed(self, soup): article = {} article['title'] = entry.title.text article['link'] = entry.link['href'].split('&url=')[1] - try: - article['img'] = "https:" + entry.content.text.split('src=\"')[1].split('\"')[0] - except: - article['img'] = None - pass articles.append(article) try: if len(articles)==0: @@ -147,3 +149,22 @@ def scrape_feed(self, soup): print("The articles for the given response are not found.") return return articles + + +class Articledata(Article): + + def get_fulltext(self): + if self.html=='': + self.build() + return self.text + + def get_metadata(self): + if self.html=='': + self.build() + return self.meta_data + def get_summary(self): + if self.html=='': + self.build() + return self.summary + +