From c9f93deb7443786beb21796d0b675b835eeb2fd1 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 3 Nov 2020 19:49:16 -0800 Subject: [PATCH 1/2] Add multi signal function and tests --- .../covidcast-py/covidcast/covidcast.py | 24 ++++++++++++++++++ .../tests/covidcast/test_covidcast.py | 25 ++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/Python-packages/covidcast-py/covidcast/covidcast.py b/Python-packages/covidcast-py/covidcast/covidcast.py index dfcb709d..a85d17de 100644 --- a/Python-packages/covidcast-py/covidcast/covidcast.py +++ b/Python-packages/covidcast-py/covidcast/covidcast.py @@ -195,6 +195,30 @@ def signal(data_source: str, return out +def signals(data_source: Union[str, List[str]], + signals: Union[str, List[str]], # pylint: disable=W0621 + start_day: Union[date, List[date]] = None, + end_day: Union[date, List[date]] = None, + geo_type: str = "county", + geo_values: Union[str, Iterable[str]] = "*", + as_of: date = None, + issues: Union[date, Tuple[date], List[date]] = None, + lag: int = None) -> List[pd.DataFrame]: + input_params = [i for i in [data_source, signals, start_day, end_day] if isinstance(i, list)] + len_args = 1 if not input_params else len(input_params[0]) + if not all(len(i) == len_args for i in input_params): + raise ValueError("Input data_source, signal, start_day, and end_day must be single values " + "or the same length.") + data_source = data_source if isinstance(data_source, list) else [data_source]*len_args + signals = signals if isinstance(signals, list) else [signals]*len_args + start_day = start_day if isinstance(start_day, list) else [start_day]*len_args + end_day = end_day if isinstance(end_day, list) else [end_day]*len_args + output = [] + for source, sig, start, end in zip(data_source, signals, start_day, end_day): + output.append(signal(source, sig, start, end, geo_type, geo_values, as_of, issues, lag)) + return output + + def metadata() -> pd.DataFrame: """Fetch COVIDcast surveillance stream metadata. diff --git a/Python-packages/covidcast-py/tests/covidcast/test_covidcast.py b/Python-packages/covidcast-py/tests/covidcast/test_covidcast.py index dc536bf7..003b2600 100644 --- a/Python-packages/covidcast-py/tests/covidcast/test_covidcast.py +++ b/Python-packages/covidcast-py/tests/covidcast/test_covidcast.py @@ -1,6 +1,6 @@ import warnings from datetime import date, datetime -from unittest.mock import patch +from unittest.mock import patch, call # Force tests to use a specific backend, so they reproduce across platforms import matplotlib @@ -74,6 +74,29 @@ def test_signal(mock_covidcast, mock_metadata): start_day=date(2020, 4, 2), end_day=date(2020, 4, 1)) +@patch("covidcast.covidcast.signal") +def test_signals(signal): + signal.return_value = None + + # test two sources + output = covidcast.signals(["1", "2"], "signal") + calls = [call("1", "signal", None, None, "county", "*", None, None, None), + call("2", "signal", None, None, "county", "*", None, None, None)] + signal.assert_has_calls(calls) + assert output == [None]*2 + + # test two sources and two start days + output = covidcast.signals(["1", "2"], "signal", [date(2020, 1, 1), date(2020, 1, 2)]) + calls = [call("1", "signal", date(2020, 1, 1), None, "county", "*", None, None, None), + call("2", "signal", date(2020, 1, 2), None, "county", "*", None, None, None)] + signal.assert_has_calls(calls) + assert output == [None]*2 + + # test error if improper input args + with pytest.raises(ValueError): + covidcast.signals(["1", "2"], ["a", "b", "c"]) + + @patch("delphi_epidata.Epidata.covidcast_meta") def test_metadata(mock_covidcast_meta): # not generating full DF since most attributes used From 226a7ca1a04faf0138ea5c0f6e4523b3dee46668 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 3 Nov 2020 20:09:16 -0800 Subject: [PATCH 2/2] Update documentation --- .../covidcast-py/covidcast/covidcast.py | 47 ++++++++++++++++++- Python-packages/covidcast-py/docs/signals.rst | 5 +- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/Python-packages/covidcast-py/covidcast/covidcast.py b/Python-packages/covidcast-py/covidcast/covidcast.py index a85d17de..0086dc1d 100644 --- a/Python-packages/covidcast-py/covidcast/covidcast.py +++ b/Python-packages/covidcast-py/covidcast/covidcast.py @@ -204,6 +204,49 @@ def signals(data_source: Union[str, List[str]], as_of: date = None, issues: Union[date, Tuple[date], List[date]] = None, lag: int = None) -> List[pd.DataFrame]: + """Download a list of multiple signals. + + Given one or more data sources, signals, start days, and end days, this function will + return a list of the desired signals by calling ``covidcast.signal()`` multiple times. + The output will use the input parameters element-wise. Because of this, the parameters + must either be an individual value or lists of the same length. For example, you can + provide two data sources and one start day, two data sources and two start days, but not + two data sources with three start days. + + :param data_source: String or list identifying the data source(s) to query, such as + ``"fb-survey"``. + :param signal_list: String or list identifying the signal(s) from that source to query, + such as ``"smoothed_cli"``. + :param start_day: Query data beginning on these dates. Provided as a single or list of + ``datetime.date`` objects. If ``start_day`` is ``None``, defaults to the + first day data is available for this signal. + :param end_day: Query data up to these dates, inclusive. Provided as a single or list of + ``datetime.date`` objects. If ``end_day`` is ``None``, defaults to the most + recent day data is available for this signal. + :param geo_type: The geography type for which to request this data, such as + ``"county"`` or ``"state"``. Available types are described in the + COVIDcast signal documentation. Defaults to ``"county"``. + :param geo_values: The geographies to fetch data for. The default, ``"*"``, + fetches all geographies. To fetch one geography, specify its ID as a + string; multiple geographies can be provided as an iterable (list, tuple, + ...) of strings. + :param as_of: Fetch only data that was available on or before this date, + provided as a ``datetime.date`` object. If ``None``, the default, return + the most recent available data. + :param issues: Fetch only data that was published or updated ("issued") on + these dates. Provided as either a single ``datetime.date`` object, + indicating a single date to fetch data issued on, or a tuple or list + specifying (start, end) dates. In this case, return all data issued in + this range. There may be multiple rows for each observation, indicating + several updates to its value. If ``None``, the default, return the most + recently issued data. + :param lag: Integer. If, for example, ``lag=3``, fetch only data that was + published or updated exactly 3 days after the date. For example, a row + with ``time_value`` of June 3 will only be included in the results if its + data was issued or updated on June 6. If ``None``, the default, return the + most recently issued data regardless of its lag. + :returns: A list of the signal DataFrames. + """ input_params = [i for i in [data_source, signals, start_day, end_day] if isinstance(i, list)] len_args = 1 if not input_params else len(input_params[0]) if not all(len(i) == len_args for i in input_params): @@ -296,7 +339,9 @@ def metadata() -> pd.DataFrame: return meta_df -def aggregate_signals(signals: list, dt: list = None, join_type: str = "outer") -> pd.DataFrame: +def aggregate_signals(signals: list, # pylint: disable=W0621 + dt: list = None, + join_type: str = "outer") -> pd.DataFrame: """Given a list of DataFrames, [optionally] lag each one and join them into one DataFrame. This method takes a list of DataFrames containing signal information for diff --git a/Python-packages/covidcast-py/docs/signals.rst b/Python-packages/covidcast-py/docs/signals.rst index e254d618..71e5ad29 100644 --- a/Python-packages/covidcast-py/docs/signals.rst +++ b/Python-packages/covidcast-py/docs/signals.rst @@ -12,9 +12,12 @@ Pandas data frame. Detailed examples are provided in the :ref:`usage examples Sometimes you would like to work with multiple signals -- for example, to obtain several signals at every location, as part of building models of features at -each location. For convenience, the package provides a function to produce a +each location. For convenience, the package provides a function to retrieve multiple +signals in a list and a function to take a list of signals and produce a single data frame containing multiple signals at each location. +.. autofunction:: covidcast.signals + .. autofunction:: covidcast.aggregate_signals