diff --git a/interval_summarizer.py b/interval_summarizer.py index e2ab670..1079787 100644 --- a/interval_summarizer.py +++ b/interval_summarizer.py @@ -56,6 +56,9 @@ def report_summary(self, messages): """The interval summaries are joined.""" return '\n'.join(self.summarize(messages)) + def set_interval(self, ispecs): + self.intervals = map(lambda ispec: IntervalSpec(**ispec), ispecs) + def summarize_segment(self, msg_seg): """Call the summarizer that is used.""" return msg_seg diff --git a/main.py b/main.py index 8c22076..a7f6bfa 100644 --- a/main.py +++ b/main.py @@ -30,7 +30,7 @@ def slackReq(): 'params' : maybe_get(req_data, 'text', default=''), 'summ' : lsa_summ } - if "gensim" in req['params'].split(): + if "gensim" in SUMMS and "gensim" in req['params'].split(): req['summ'] = None return (SlackRouter().get_summary(**req)) @@ -51,7 +51,7 @@ def slackTestReq(): 'summ' : lsa_summ, 'test' : True } - if "gensim" in req['params'].split(): + if "gensim" in SUMMS and "gensim" in req['params'].split(): req['summ'] = None return (SlackRouter(test=True).get_summary(**req)) diff --git a/test_spacy_with_hypothesis.py b/test_spacy_with_hypothesis.py index bccd1fa..8857305 100644 --- a/test_spacy_with_hypothesis.py +++ b/test_spacy_with_hypothesis.py @@ -2,6 +2,7 @@ import json import io from sp_summarizer import (SpacyTsSummarizer) +import hypothesis.settings as hs from interval_summarizer import (IntervalSpec, TsSummarizer, tagged_sum, ts_to_time) import lsa @@ -28,27 +29,23 @@ def read_dir(fdir): test_json_msgs_c3 = [read_dir(fdir) for fdir in ['api-test', 'calypso', 'games', 'happiness', 'hg', 'jetpack', 'jetpackfuel', 'livechat', 'tickets', 'vip']] -# for dirs in ['api-test', 'calypso', 'games', 'happiness', 'hg', 'jetpack', 'jetpackfuel', 'livechat', 'tickets', 'vip']: -# for jfile in glob.glob('./data/slack-logs-2/{}/*.json'.format(dirs)): -# test_json_msgs_c3 += json.load(io.open(jfile, encoding='utf-8')) - -print len(test_json_msgs_c3) - class TestSummarize(unittest.TestCase): test_msgs = test_json_msgs + summ = SpacyTsSummarizer([]) + summ.set_summarizer(lsa.LsaSummarizer()) + @given( lists(elements=sampled_from(test_json_msgs), min_size=3), - integers(min_value=1, max_value=20) + integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds1_days(self, smp_msgs, days): """Generate something for N day interval""" logger.info("Input is %s", smp_msgs) asd = [{'days': days, 'size' : 3, 'txt' : u'Summary for first {} days:\n'.format(days)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - sumry = summ.summarize(smp_msgs) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(smp_msgs) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) @@ -60,15 +57,14 @@ def test_text_rank_summarization_ds1_days(self, smp_msgs, days): @given( lists(elements=sampled_from(test_json_msgs_c2), min_size=12), - integers(min_value=1, max_value=20) + integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds2_days(self, smp_msgs, days): """Generate something for N day interval""" logger.info("Input is %s", smp_msgs) asd = [{'days': days, 'size' : 3, 'txt' : u'Summary for first {} days:\n'.format(days)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - sumry = summ.summarize(smp_msgs) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(smp_msgs) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) @@ -80,7 +76,7 @@ def test_text_rank_summarization_ds2_days(self, smp_msgs, days): @given( integers(min_value=1, max_value=1000), - integers(min_value=1, max_value=20) + integers(min_value=1, max_value=20), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds3_days(self, sampsize, days): """Generate something for N day interval""" @@ -88,10 +84,8 @@ def test_text_rank_summarization_ds3_days(self, sampsize, days): samp = random.choice(test_json_msgs_c3)[random.randint(1,len(ssamp)-2):] logger.info("Input is segment is %s", samp) asd = [{'days': days, 'size' : 3, 'txt' : u'Summary for first {} days:\n'.format(days)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - - sumry = summ.summarize(samp) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(samp) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) @@ -102,16 +96,14 @@ def test_text_rank_summarization_ds3_days(self, sampsize, days): @given(lists(elements=sampled_from(test_json_msgs), min_size=1), - integers(min_value=1, max_value=24) + integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds1_hours(self, smp_msgs, hours): """Generate something for N hour intervals""" logger.info("Input is %s", smp_msgs) asd = [{'hours': hours, 'size' : 3, 'txt' : u'Summary for first {} hours:\n'.format(hours)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - - sumry = summ.summarize(smp_msgs) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(smp_msgs) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) @@ -122,16 +114,14 @@ def test_text_rank_summarization_ds1_hours(self, smp_msgs, hours): @given(lists(elements=sampled_from(test_json_msgs_c2), min_size=1), - integers(min_value=1, max_value=24) + integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds2_hours(self, smp_msgs, hours): """Generate something for N hour intervals""" logger.info("Input is %s", smp_msgs) asd = [{'hours': hours, 'size' : 3, 'txt' : u'Summary for first {} hours:\n'.format(hours)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - - sumry = summ.summarize(smp_msgs) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(smp_msgs) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) @@ -143,7 +133,7 @@ def test_text_rank_summarization_ds2_hours(self, smp_msgs, hours): @given( integers(min_value=2, max_value=1000), - integers(min_value=1, max_value=24) + integers(min_value=1, max_value=24), settings=hs.Settings(timeout=1000) ) def test_text_rank_summarization_ds3_hours(self, sampsize, hours): """Generate something for N hour intervals""" @@ -151,10 +141,8 @@ def test_text_rank_summarization_ds3_hours(self, sampsize, hours): samp = random.choice(test_json_msgs_c3)[random.randint(1,len(ssamp)-2):] logger.info("Input is segment is %s", samp) asd = [{'hours': hours, 'size' : 3, 'txt' : u'Summary for first {} hours:\n'.format(hours)}] - summ = SpacyTsSummarizer(asd) - summ.set_summarizer(lsa.LsaSummarizer()) - - sumry = summ.summarize(samp) + TestSummarize.summ.set_interval(asd) + sumry = TestSummarize.summ.summarize(samp) logger.debug("Summary is %s", sumry) # Length of summary is at least 1 and no greater than 3 self.assertTrue(len(sumry) >= 1) diff --git a/ts_config.py b/ts_config.py index d41c5d9..a5a4b24 100644 --- a/ts_config.py +++ b/ts_config.py @@ -4,6 +4,6 @@ DEBUG=True LOG_FILE="./summary.log" TEST_JSON="./data/test-events-elastic.json" -SUMMS=["gensim", "spacy"] +SUMMS=["spacy"]