diff --git a/docs/api/flusurv.md b/docs/api/flusurv.md index 3e6923bf2..79ddd15ef 100644 --- a/docs/api/flusurv.md +++ b/docs/api/flusurv.md @@ -54,22 +54,45 @@ If neither is specified, the current issues are used. ## Response -| Field | Description | Type | -|--------------------------|-----------------------------------------------------------------|------------------| -| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | -| `epidata` | list of results | array of objects | -| `epidata[].release_date` | | string | -| `epidata[].location` | | string | -| `epidata[].issue` | | integer | -| `epidata[].epiweek` | | integer | -| `epidata[].lag` | | integer | -| `epidata[].rate_age_0` | | float | -| `epidata[].rate_age_1` | | float | -| `epidata[].rate_age_2` | | float | -| `epidata[].rate_age_3` | | float | -| `epidata[].rate_age_4` | | float | -| `epidata[].rate_overall` | | float | -| `message` | `success` or error message | string | +| Field | Description | Type | +|---|---|---| +| `result` | result code: 1 = success, 2 = too many results, -2 = no results | integer | +| `epidata` | list of results | array of objects | +| `epidata[].release_date` | the date when this record was first received by Delphi | string | +| `epidata[].location` | the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany' | string | +| `epidata[].issue` | the epiweek of receipt by Delphi (e.g. issue 201453 includes epiweeks up to and including 2014w53, but not 2015w01 or following) | integer | +| `epidata[].epiweek` | the epiweek during which the data was collected | integer | +| `epidata[].lag` | number of weeks between `epiweek` and `issue` | integer | +| `epidata[].rate_age_0` | hospitalization rate for ages 0-4 | float | +| `epidata[].rate_age_1` | hospitalization rate for ages 5-17 | float | +| `epidata[].rate_age_2` | hospitalization rate for ages 18-49 | float | +| `epidata[].rate_age_3` | hospitalization rate for ages 50-64 | float | +| `epidata[].rate_age_4` | hospitalization rate for ages 65+ | float | +| `epidata[].rate_overall` | overall hospitalization rate | float | +| `epidata[].rate_age_5` | hospitalization rate for ages 65-74 | float | +| `epidata[].rate_age_6` | hospitalization rate for ages 75-84 | float | +| `epidata[].rate_age_7` | hospitalization rate for ages 85+ | float | +| `epidata[].rate_age_18t29` | hospitalization rate for ages 18 to 29 | float | +| `epidata[].rate_age_30t39` | hospitalization rate for ages 30 to 39 | float | +| `epidata[].rate_age_40t49` | hospitalization rate for ages 40 to 49 | float | +| `epidata[].rate_age_5t11` | hospitalization rate for ages 5 to 11 | float | +| `epidata[].rate_age_12t17` | hospitalization rate for ages 12 to 17 | float | +| `epidata[].rate_age_lt18` | hospitalization rate for ages <18 | float | +| `epidata[].rate_age_gte18` | hospitalization rate for ages >=18 | float | +| `epidata[].rate_age_0tlt1` | hospitalization rate for ages 0-1 | float | +| `epidata[].rate_age_1t4` | hospitalization rate for ages 1-4 | float | +| `epidata[].rate_age_gte75` | hospitalization rate for ages >=75 | float | +| `epidata[].rate_race_white` | hospitalization rate for white people | float | +| `epidata[].rate_race_black` | hospitalization rate for black people | float | +| `epidata[].rate_race_hisp` | hospitalization rate for Hispanic/Latino people | float | +| `epidata[].rate_race_asian` | hospitalization rate for Asian people | float | +| `epidata[].rate_race_natamer` | hospitalization rate for American Indian/Alaskan Native people | float | +| `epidata[].rate_sex_male` | hospitalization rate for males | float | +| `epidata[].rate_sex_female` | hospitalization rate for females | float | +| `epidata[].rate_flu_a` | hospitalization rate for inflenza A | float | +| `epidata[].rate_flu_b` | hospitalization rate for inflenza B | float | +| `epidata[].season` | indicates the start and end years of the winter flu season in the format YYYY-YY (e.g. 2022-23 indicates the flu season running late 2022 through early 2023) | string | +| `message` | `success` or error message | string | Notes: * The `flusurv` age groups are, in general, not the same as the ILINet diff --git a/integrations/server/test_flusurv.py b/integrations/server/test_flusurv.py index 33f0f00b8..ef1f0978e 100644 --- a/integrations/server/test_flusurv.py +++ b/integrations/server/test_flusurv.py @@ -10,9 +10,54 @@ def localSetUp(self): def test_flusurv(self): """Basic integration test for flusurv endpoint""" + # TODO: Should test out the whole pipeline, where we mock the source + # API by pulling more realistic data from a local JSON. self.cur.execute( - "INSERT INTO `flusurv`(`release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, `rate_age_5`, `rate_age_6`, `rate_age_7`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", - ("2012-11-02", "201243", "201143", "CA", "52", "0", "0", "0", "0.151", "0", "0.029", "0", "0", "0"), + """ + INSERT INTO `flusurv`( + `release_date`, + `issue`, + `epiweek`, + `location`, + `lag`, + `rate_age_0`, + `rate_age_1`, + `rate_age_2`, + `rate_age_3`, + `rate_age_4`, + `rate_overall`, + `rate_age_5`, + `rate_age_6`, + `rate_age_7`, + `rate_age_18t29`, + `rate_age_30t39`, + `rate_age_40t49`, + `rate_age_5t11`, + `rate_age_12t17`, + `rate_age_lt18`, + `rate_age_gte18`, + `rate_age_1t4`, + `rate_age_gte75`, + `rate_age_0tlt1`, + `rate_race_white`, + `rate_race_black`, + `rate_race_hisp`, + `rate_race_asian`, + `rate_race_natamer`, + `rate_sex_male`, + `rate_sex_female`, + `rate_flu_a`, + `rate_flu_b`, + `season` + ) VALUES( + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + ) + """, + ( + "2012-11-02", 201243, 201143, "CA", 52, 0, 0, 0, 0.151, 0, 0.029, 0, 0, 0, + 2.54, 0, 1, 0, 0, 0, 0, 0, 0.68, 0.46, 1, 1, 0, 0, 0, 99, 0, 0, 22.2, "2012-13" + ), ) self.cnx.commit() response = self.epidata_client.flusurv(epiweeks=201143, locations="CA") @@ -25,6 +70,7 @@ def test_flusurv(self): "location": "CA", "issue": 201243, "epiweek": 201143, + "season": "2012-13", "lag": 52, "rate_age_0": 0.0, "rate_age_1": 0.0, @@ -32,6 +78,28 @@ def test_flusurv(self): "rate_age_3": 0.151, "rate_age_4": 0.0, "rate_overall": 0.029, + "rate_age_5": 0.0, + "rate_age_6": 0.0, + "rate_age_7": 0.0, + "rate_age_18t29": 2.54, + "rate_age_30t39": 0.0, + "rate_age_40t49": 1.0, + "rate_age_5t11": 0.0, + "rate_age_12t17": 0.0, + "rate_age_lt18": 0.0, + "rate_age_gte18": 0.0, + "rate_age_1t4": 0.0, + "rate_age_gte75": 0.68, + "rate_age_0tlt1": 0.46, + "rate_race_white": 1.0, + "rate_race_black": 1.0, + "rate_race_hisp": 0.0, + "rate_race_asian": 0.0, + "rate_race_natamer": 0.0, + "rate_sex_male": 99.0, + "rate_sex_female": 0.0, + "rate_flu_a": 0.0, + "rate_flu_b": 22.2, } ], "result": 1, diff --git a/src/acquisition/flusurv/api.py b/src/acquisition/flusurv/api.py new file mode 100644 index 000000000..dd602766a --- /dev/null +++ b/src/acquisition/flusurv/api.py @@ -0,0 +1,348 @@ +""" +=============== +=== Purpose === +=============== + +Fetches FluSurv-NET data (flu hospitalization rates) from CDC. Unlike the other +CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct +download. This program emulates web browser requests for the web app and +extracts data of interest from the JSON response. + +For unknown reasons, the server appears to provide two separate rates for any +given location, epiweek, and age group. These rates are usually identical--but +not always. When two given rates differ, the first is kept. This appears to be +the behavior of the web app, at the following location: + - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 + +See also: + - flusurv_update.py + - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html + - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article + - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). + The US Influenza Hospitalization Surveillance Network. Emerging Infectious + Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. + + +================= +=== Changelog === +================= + +2017-05-22 + * rewrite for new data source +2017-02-17 + * handle discrepancies by prefering more recent values +2017-02-03 + + initial version +""" + +# standard library +from collections import defaultdict +from datetime import datetime +import json +import time +from warnings import warn + +# third party +import requests + +# first party +from delphi.utils.epidate import EpiDate +from delphi.utils.epiweek import delta_epiweeks +from .constants import (MAP_REGION_NAMES_TO_ABBR, MAP_ENTIRE_NETWORK_NAMES, + SEX_GROUPS, FLUSURV_BASE_URL, ID_TO_LABEL_MAP) + + +def fetch_json(path, payload, call_count=1, requests_impl=requests): + """Send a request to the server and return the parsed JSON response.""" + + # it's polite to self-identify this "bot" + DELPHI_URL = "https://delphi.cmu.edu/index.html" + USER_AGENT = f"Mozilla/5.0 (compatible; delphibot/1.0; +{DELPHI_URL})" + + # the FluSurv AMF server + flusurv_url = FLUSURV_BASE_URL + path + + # request headers + headers = { + "Accept-Encoding": "gzip", + "User-Agent": USER_AGENT, + } + + # send the request and read the response + if payload is None: + method = requests_impl.get + data = None + else: + headers["Content-Type"] = "application/json;charset=UTF-8" + method = requests_impl.post + data = json.dumps(payload) + resp = method(flusurv_url, headers=headers, data=data) + + # check the HTTP status code + if resp.status_code == 500 and call_count <= 2: + # the server often fails with this status, so wait and retry + delay = 10 * call_count + print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") + time.sleep(delay) + return fetch_json(path, payload, call_count=call_count + 1) + elif resp.status_code != 200: + raise Exception(["status code != 200", resp.status_code]) + + # check response mime type + if "application/json" not in resp.headers.get("Content-Type", ""): + raise Exception("response is not json") + + # return the decoded json object + return resp.json() + + +def mmwrid_to_epiweek(mmwrid): + """Convert a CDC week index into an epiweek.""" + + # Add the difference in IDs, which are sequential, to a reference epiweek, + # which is 2003w40 in this case. This is the earliest date we see in the + # returned data. The index-1 week on this scale is the first week of + # 1962. + epiweek_200340 = EpiDate(2003, 9, 28) + mmwrid_200340 = 2179 + return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() + + +class FlusurvMetadata: + def __init__(self, max_age_weeks): + self.max_age_weeks = max_age_weeks + self.id_to_group = ID_TO_LABEL_MAP + + self._fetch_flusurv_metadata() + + self._determine_issue() + self._set_recent_seasonids() + self._make_location_to_code_map() + self._make_id_season_map() + + def _fetch_flusurv_metadata(self): + """Return FluSurv JSON metadata object.""" + self.metadata = fetch_json( + "PostPhase03DataTool", + {"appversion": "Public", "key": "", "injson": []} + ) + + def _location_name_to_abbr(self, geo, network): + """Find short geo name corresponding to a geo and network""" + if geo == "Entire Network": + return MAP_ENTIRE_NETWORK_NAMES[network] + else: + return MAP_REGION_NAMES_TO_ABBR[geo] + + def _make_location_to_code_map(self): + """Create a map for all currently available FluSurv locations from names to codes""" + location_to_code = dict() + for location in self.metadata["catchments"]: + # "area" is the long-form region (California, etc), and "name" is + # the network/data source type (IHSP, EIP, etc) + location_name = self._location_name_to_abbr(location["area"], location["name"]) + if location_name in location_to_code.keys(): + raise Exception( + f"catchment {location_name} already seen, but " + + "we expect catchments to be unique" + ) + + location_to_code[location_name] = ( + int(location["networkid"]), int(location["catchmentid"]) + ) + self.location_to_code = location_to_code + self.locations = self.location_to_code.keys() + + def _determine_issue(self): + """ + Extract the current issue from the FluSurv metadata result. + + Note: For each FluSurv API pull, only one issue date is listed. For + time values and locations we've seen before, we can't differentiate + which values were updated and which are the same. + + Args: + metadata: dictionary representing a JSON response from the FluSurv API + """ + # extract + date = datetime.strptime(self.metadata["loaddatetime"], "%b %d, %Y") + + # convert + self.issue = EpiDate(date.year, date.month, date.day).get_ew() + + def _set_recent_seasonids(self): + # Ignore seasons with all dates older than `self.max_age_weeks` (from + # user command line argument `max_age`) + self.seasonids = { + season_blob["seasonid"] for season_blob in self.metadata["seasons"] + if delta_epiweeks(mmwrid_to_epiweek(season_blob["endweek"]), self.issue) < self.max_age_weeks + } + + def _make_id_season_map(self): + """Create a map from seasonid to season description, in the format "YYYY-YY" """ + id_to_label = defaultdict(lambda: defaultdict(lambda: None)) + for season in self.metadata["seasons"]: + id_to_label[season["seasonid"]] = season["label"].strip() + + self.id_to_season = id_to_label + + +class FlusurvLocationFetcher: + def __init__(self, max_age_weeks): + self.metadata = FlusurvMetadata(max_age_weeks) + + def get_data(self, location): + """ + Fetch and parse flu data for a given location. + + This method performs the following operations: + - fetch location-specific FluSurv data from CDC API + - extracts and returns hospitalization rates for each epiweek + """ + # fetch + print("[fetching flusurv data...]") + data_in = self._fetch_flusurv_location(location) + + # extract + print("[reformatting flusurv result...]") + data_out = self._group_by_epiweek(data_in) + + # return + print(f"[successfully fetched data for {location}]") + return data_out + + def _fetch_flusurv_location(self, location): + """Return FluSurv JSON object for a given location.""" + location_code = self.metadata.location_to_code[location] + + result = fetch_json( + "PostPhase03DataTool", + { + "appversion": "Public", + "key": "getdata", + "injson": [ + { + "networkid": location_code[0], + "catchmentid": location_code[1], + "seasonid": elem, + } for elem in self.metadata.seasonids], + }, + ) + + # If no data is returned (a given seasonid is not reported, + # location codes are invalid, etc), the API returns a JSON like: + # { + # 'default_data': { + # 'response': 'No Data' + # } + # } + # + # If data is returned, then data["default_data"] is a list + # and data["default_data"]["response"] doesn't exist. + if (len(result["default_data"]) == 0 or + ( + isinstance(result["default_data"], dict) and + "response" in result["default_data"].keys() and + result["default_data"]["response"] == "No Data" + )): + warn(f"warning: No data was returned from the API for {location}") + # Return empty obs with right format to avoid downstream errors + return {"default_data": []} + + return result + + def _group_by_epiweek(self, data): + """ + Convert default data for a single location into an epiweek-grouped dictionary + + Args: + data: The "default_data" element of a GRASP API response object, + as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata` + metadata: The JSON result returned from `fetch_flusurv_metadata()` + containing mappings from strata IDs and season IDs to descriptions. + + Returns a dictionary of the format + { + : { + : { + : , + ... + : , + ... + } + ... + } + ... + } + """ + data = data["default_data"] + + # Create output object + # First layer of keys is epiweeks. Second layer of keys is groups + # (by id, not age in years, sex abbr, etc). + # + # If a top-level key doesn't already exist, create a new empty dict. + # If a secondary key doesn't already exist, create a new key with a + # default value of None. + data_out = defaultdict(lambda: defaultdict(lambda: None)) + + # data["default_data"] is a list of dictionaries, with the format + # [ + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + # {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + # ... + # ] + for obs in data: + epiweek = mmwrid_to_epiweek(obs["mmwrid"]) + groupname = self._groupid_to_name( + ageid = obs["ageid"], sexid = obs["sexid"], + raceid = obs["raceid"], fluid = obs["flutype"] + ) + + # Set season description. This will be overwritten every iteration, + # but should always have the same value per epiweek group. + data_out[epiweek]["season"] = self.metadata.id_to_season[obs["seasonid"]] + + rate = obs["weeklyrate"] + prev_rate = data_out[epiweek][groupname] + if prev_rate is None: + # This is the first time to see a rate for this epiweek-group + # combo + data_out[epiweek][groupname] = rate + elif prev_rate != rate: + # Skip and warn; a different rate was already found for this + # epiweek-group combo + warn((f"warning: Multiple rates seen for {epiweek} " + f"{groupname}, but previous value {prev_rate} does not " + f"equal new value {rate}. Using the first value.")) + + print(f"found data for {len(data_out.keys())} epiweeks") + + return data_out + + def _groupid_to_name(self, ageid, sexid, raceid, fluid): + if ((ageid, sexid, raceid, fluid).count(0) < 3): + raise ValueError("Expect at least three of four group ids to be 0") + if (ageid, sexid, raceid, fluid).count(0) == 4: + group = "overall" + # In all cases, if id is not available as a key in the dict, use the + # raw id as the name suffix + elif ageid != 0: + if ageid == 6: + # Ageid of 6 used to be used for the "overall" category. + # Now "overall" is represented by a valueid of 0, and ageid of 6 + # is not used for any group. If we see an ageid of 6, something + # has gone wrong. + raise ValueError("Ageid cannot be 6; please check for changes in the API") + else: + age_group = self.metadata.id_to_group["Age"].get(ageid, str(ageid)) + group = "age_" + age_group + elif sexid != 0: + group = "sex_" + self.metadata.id_to_group["Sex"].get(sexid, str(sexid)) + elif raceid != 0: + group = "race_" + self.metadata.id_to_group["Race"].get(raceid, str(raceid)) + elif fluid != 0: + group = "flu_" + self.metadata.id_to_group["Flutype"].get(fluid, str(fluid)) + + return "rate_" + group diff --git a/src/acquisition/flusurv/constants.py b/src/acquisition/flusurv/constants.py new file mode 100644 index 000000000..384750b99 --- /dev/null +++ b/src/acquisition/flusurv/constants.py @@ -0,0 +1,185 @@ +from delphi_utils import GeoMapper + +""" +As of Sept 2023, for new data we expect to see these 23 groups, as described +in the top-level "master_lookup" element, below, of the new GRASP API +(https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool) response object. +See `./reference/new_grasp_result.json` for a full example response. + 'master_lookup' = [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True} + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ] + +All 23 strata are available starting epiweek 200935, inclusive. + +The previous version of the GRASP API +(https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp) used the following age-id +mapping, as described in the top-level "ages" element, below. See +`./reference/old_grasp_result.json` for a full example response. + 'ages' = [ + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'} + ] + +In addition to the new age, race, and sex breakdowns, the group id for overall +reporting has changed from 6 to 0. Age ids 1-5 and 7-9 retain the +same meanings; age id 6 is not reported. +""" +HISTORICAL_GROUPS = ( + "rate_overall", + + "rate_age_0", + "rate_age_1", + "rate_age_2", + "rate_age_3", + "rate_age_4", + "rate_age_5", + "rate_age_6", + "rate_age_7", +) +NEW_AGE_GROUPS = ( + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", + "rate_age_1t4", + "rate_age_gte75", + "rate_age_0tlt1", +) +RACE_GROUPS = ( + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", +) +SEX_GROUPS = ( + "rate_sex_male", + "rate_sex_female", +) +FLU_GROUPS = ( + "rate_flu_a", + "rate_flu_b", +) +EXPECTED_GROUPS = HISTORICAL_GROUPS + NEW_AGE_GROUPS + RACE_GROUPS + SEX_GROUPS + FLU_GROUPS + + +# dict(Variable: dict(valueid: output_col_suffix)) +ID_TO_LABEL_MAP = { + "Age": { + # The column names used in the DB for the original age groups + # are ordinal, such that: + # "rate_age_0" corresponds to age group 1, 0-4 yr + # "rate_age_1" corresponds to age group 2, 5-17 yr + # "rate_age_2" corresponds to age group 3, 18-49 yr + # "rate_age_3" corresponds to age group 4, 50-64 yr + # "rate_age_4" corresponds to age group 5, 65+ yr + # "rate_age_5" corresponds to age group 7, 65-74 yr + # "rate_age_6" corresponds to age group 8, 75-84 yr + # "rate_age_7" corresponds to age group 9, 85+ yr + # + # Group 6 was the "overall" category and not included in the + # ordinal naming scheme. Because of that, groups 1-5 have column + # ids equal to the ageid - 1; groups 7-9 have column ids equal + # to ageid - 2. + # + # Ageid of 6 used to be used for the "overall" category. + # Now "overall" is represented by a valueid of 0, and ageid of 6 + # is not used for any group. If we see an ageid of 6, something + # has gone wrong. + 1: "0", # 'Label': '0-4 yr' + 2: "1", # 'Label': '5-17 yr' + 3: "2", # 'Label': '18-49 yr' + 4: "3", # 'Label': '50-64 yr' + 5: "4", # 'Label': '65+ yr' + 7: "5", # 'Label': '65-74 yr' + 8: "6", # 'Label': '75-84 yr' + 9: "7", # 'Label': '85+' + 10: "18t29", # 'Label': '18-29 yr' + 11: "30t39", # 'Label': '30-39 yr' + 12: "40t49", # 'Label': '40-49 yr' + 13: "0tlt1", # 'Label': '0-< 1 yr' + 14: "1t4", # 'Label': '1-4 yr', + 15: "gte75", # 'Label': '>= 75', + 21: "5t11", # 'Label': '5-11 yr' + 22: "12t17", # 'Label': '12-17 yr' + 97: "lt18", # 'Label': '< 18' + 98: "gte18", # 'Label': '>= 18' + }, + "Race": { + 1: "white", # 'Label': 'White' + 2: "black", # 'Label': 'Black' + 3: "hisp", # 'Label': 'Hispanic/Latino' + 4: "asian", # 'Label': 'Asian/Pacific Islander' + 5: "natamer", # 'Label': 'American Indian/Alaska Native' + }, + "Sex": { + 1: "male", # 'Label': 'Male' + 2: "female", # 'Label': 'Female' + }, + "Flutype": { + 1: "a", # 'Label': 'Influenza A' + 2: "b", # 'Label': 'Influenza B' + }, + # Unused. Leaving here for documentation's sake. + "Overall": { + 0: "overall", # 'Label': 'Overall' + }, +} + + + +MAX_AGE_TO_CONSIDER_WEEKS = 52 + + +gmpr = GeoMapper() +map_state_names = gmpr.get_crosswalk("state", "state") +map_state_names = map_state_names.to_dict(orient = "records") +map_state_names = {elem["state_name"]: elem["state_id"].upper() for elem in map_state_names} + +map_nonstandard_names = {"New York - Albany": "NY_albany", "New York - Rochester": "NY_rochester"} + +MAP_REGION_NAMES_TO_ABBR = {**map_state_names, **map_nonstandard_names} + +MAP_ENTIRE_NETWORK_NAMES = { + "FluSurv-NET": "network_all", + "EIP": "network_eip", + "IHSP": "network_ihsp" +} + + +FLUSURV_BASE_URL = "https://gis.cdc.gov/GRASP/Flu3/" diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py deleted file mode 100644 index 28105d933..000000000 --- a/src/acquisition/flusurv/flusurv.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -=============== -=== Purpose === -=============== - -Fetches FluSurv-NET data (flu hospitaliation rates) from CDC. Unlike the other -CDC-hosted datasets (e.g. FluView), FluSurv is not available as a direct -download. This program emulates web browser requests for the web app and -extracts data of interest from the JSON response. - -For unknown reasons, the server appears to provide two separate rates for any -given location, epiweek, and age group. These rates are usually identical--but -not always. When two given rates differ, the first is kept. This appears to be -the behavior of the web app, at the following location: - - https://gis.cdc.gov/GRASP/Fluview/FluView3References/Main/FluView3.js:859 - -See also: - - flusurv_update.py - - https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html - - https://wwwnc.cdc.gov/eid/article/21/9/14-1912_article - - Chaves, S., Lynfield, R., Lindegren, M., Bresee, J., & Finelli, L. (2015). - The US Influenza Hospitalization Surveillance Network. Emerging Infectious - Diseases, 21(9), 1543-1550. https://dx.doi.org/10.3201/eid2109.141912. - - -================= -=== Changelog === -================= - -2017-05-22 - * rewrite for new data source -2017-02-17 - * handle discrepancies by prefering more recent values -2017-02-03 - + initial version -""" - -# standard library -from datetime import datetime -import json -import time - -# third party -import requests - -# first party -from delphi.utils.epidate import EpiDate - - -# all currently available FluSurv locations and their associated codes -# the number pair represents NetworkID and CatchmentID -location_codes = { - "CA": (2, 1), - "CO": (2, 2), - "CT": (2, 3), - "GA": (2, 4), - "IA": (3, 5), - "ID": (3, 6), - "MD": (2, 7), - "MI": (3, 8), - "MN": (2, 9), - "NM": (2, 11), - "NY_albany": (2, 13), - "NY_rochester": (2, 14), - "OH": (3, 15), - "OK": (3, 16), - "OR": (2, 17), - "RI": (3, 18), - "SD": (3, 19), - "TN": (2, 20), - "UT": (3, 21), - "network_all": (1, 22), - "network_eip": (2, 22), - "network_ihsp": (3, 22), -} - - -def fetch_json(path, payload, call_count=1, requests_impl=requests): - """Send a request to the server and return the parsed JSON response.""" - - # it's polite to self-identify this "bot" - delphi_url = "https://delphi.cmu.edu/index.html" - user_agent = f"Mozilla/5.0 (compatible; delphibot/1.0; +{delphi_url})" - - # the FluSurv AMF server - flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path - - # request headers - headers = { - "Accept-Encoding": "gzip", - "User-Agent": user_agent, - } - if payload is not None: - headers["Content-Type"] = "application/json;charset=UTF-8" - - # send the request and read the response - if payload is None: - method = requests_impl.get - data = None - else: - method = requests_impl.post - data = json.dumps(payload) - resp = method(flusurv_url, headers=headers, data=data) - - # check the HTTP status code - if resp.status_code == 500 and call_count <= 2: - # the server often fails with this status, so wait and retry - delay = 10 * call_count - print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...") - time.sleep(delay) - return fetch_json(path, payload, call_count=call_count + 1) - elif resp.status_code != 200: - raise Exception(["status code != 200", resp.status_code]) - - # check response mime type - if "application/json" not in resp.headers.get("Content-Type", ""): - raise Exception("response is not json") - - # return the decoded json object - return resp.json() - - -def fetch_flusurv_object(location_code): - """Return decoded FluSurv JSON object for the given location.""" - return fetch_json( - "PostPhase03GetData", - { - "appversion": "Public", - "networkid": location_code[0], - "cacthmentid": location_code[1], - }, - ) - - -def mmwrid_to_epiweek(mmwrid): - """Convert a CDC week index into an epiweek.""" - - # Add the difference in IDs, which are sequential, to a reference epiweek, - # which is 2003w40 in this case. - epiweek_200340 = EpiDate(2003, 9, 28) - mmwrid_200340 = 2179 - return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew() - - -def extract_from_object(data_in): - """ - Given a FluSurv data object, return hospitaliation rates. - - The returned object is indexed first by epiweek, then by zero-indexed age - group. - """ - - # an object to hold the result - data_out = {} - - # iterate over all seasons and age groups - for obj in data_in["busdata"]["dataseries"]: - if obj["age"] in (10, 11, 12): - # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242): - # capture as-of-yet undefined age groups 10, 11, and 12 - continue - age_index = obj["age"] - 1 - # iterage over weeks - for mmwrid, _, _, rate in obj["data"]: - epiweek = mmwrid_to_epiweek(mmwrid) - if epiweek not in data_out: - # weekly rate of each age group - data_out[epiweek] = [None] * 9 - prev_rate = data_out[epiweek][age_index] - if prev_rate is None: - # this is the first time to see a rate for this epiweek/age - data_out[epiweek][age_index] = rate - elif prev_rate != rate: - # a different rate was already found for this epiweek/age - format_args = (epiweek, obj["age"], prev_rate, rate) - print("warning: %d %d %f != %f" % format_args) - - # sanity check the result - if len(data_out) == 0: - raise Exception("no data found") - - # print the result and return flu data - print(f"found data for {len(data_out)} weeks") - return data_out - - -def get_data(location_code): - """ - Fetch and parse flu data for the given location. - - This method performs the following operations: - - fetches FluSurv data from CDC - - extracts and returns hospitaliation rates - """ - - # fetch - print("[fetching flusurv data...]") - data_in = fetch_flusurv_object(location_code) - - # extract - print("[extracting values...]") - data_out = extract_from_object(data_in) - - # return - print("[scraped successfully]") - return data_out - - -def get_current_issue(): - """Scrape the current issue from the FluSurv main page.""" - - # fetch - data = fetch_json("GetPhase03InitApp?appVersion=Public", None) - - # extract - date = datetime.strptime(data["loaddatetime"], "%b %d, %Y") - - # convert and return - return EpiDate(date.year, date.month, date.day).get_ew() diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py index 1aa8e9885..78ed37149 100644 --- a/src/acquisition/flusurv/flusurv_update.py +++ b/src/acquisition/flusurv/flusurv_update.py @@ -19,42 +19,9 @@ === Data Dictionary === ======================= -`flusurv` is the table where US flu hospitalization rates are stored. -+--------------+-------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+--------------+-------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| release_date | date | NO | MUL | NULL | | -| issue | int(11) | NO | MUL | NULL | | -| epiweek | int(11) | NO | MUL | NULL | | -| location | varchar(32) | NO | MUL | NULL | | -| lag | int(11) | NO | MUL | NULL | | -| rate_age_0 | double | YES | | NULL | | -| rate_age_1 | double | YES | | NULL | | -| rate_age_2 | double | YES | | NULL | | -| rate_age_3 | double | YES | | NULL | | -| rate_age_4 | double | YES | | NULL | | -| rate_overall | double | YES | | NULL | | -| rate_age_5 | double | YES | | NULL | | -| rate_age_6 | double | YES | | NULL | | -| rate_age_7 | double | YES | | NULL | | -+--------------+-------------+------+-----+---------+----------------+ -id: unique identifier for each record -release_date: the date when this record was first published by the CDC -issue: the epiweek of publication (e.g. issue 201453 includes epiweeks up to - and including 2014w53, but not 2015w01 or following) -epiweek: the epiweek during which the data was collected -location: the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany') -lag: number of weeks between `epiweek` and `issue` -rate_age_0: hospitalization rate for ages 0-4 -rate_age_1: hospitalization rate for ages 5-17 -rate_age_2: hospitalization rate for ages 18-49 -rate_age_3: hospitalization rate for ages 50-64 -rate_age_4: hospitalization rate for ages 65+ -rate_overall: overall hospitalization rate -rate_age_5: hospitalization rate for ages 65-74 -rate_age_6: hospitalization rate for ages 75-84 -rate_age_7: hospitalization rate for ages 85+ +US flu hospitalization rates are stored in the `flusurv` table. See +`strc/ddl/fluview.sql` for the `flusurv` schema. See `docs/api/flusurv.md` for +field descriptions. ================= === Changelog === @@ -70,15 +37,17 @@ # standard library import argparse +from warnings import warn # third party import mysql.connector # first party -from delphi.epidata.acquisition.flusurv import flusurv import delphi.operations.secrets as secrets from delphi.utils.epidate import EpiDate from delphi.utils.epiweek import delta_epiweeks +from .api import FlusurvLocationFetcher +from .constants import (MAX_AGE_TO_CONSIDER_WEEKS, EXPECTED_GROUPS) def get_rows(cur): @@ -90,17 +59,13 @@ def get_rows(cur): return num -def update(issue, location_name, test_mode=False): - """Fetch and store the currently avialble weekly FluSurv dataset.""" - - # fetch data - location_code = flusurv.location_codes[location_name] - print("fetching data for", location_name, location_code) - data = flusurv.get_data(location_code) +def update(fetcher, location, test_mode=False): + """Fetch and store the currently available weekly FluSurv dataset.""" + # Fetch location-specific data + data = fetcher.get_data(location) # metadata epiweeks = sorted(data.keys()) - location = location_name release_date = str(EpiDate.today()) # connect to the database @@ -111,40 +76,77 @@ def update(issue, location_name, test_mode=False): print(f"rows before: {int(rows1)}") # SQL for insert/update - sql = """ + nonrelease_fields = ("issue", "epiweek", "location", "lag", "season") + EXPECTED_GROUPS + other_field_names = ", ".join( + f"`{name}`" for name in nonrelease_fields + ) + other_field_values = ", ".join( + f"%({name})s" for name in nonrelease_fields + ) + # Updates on duplicate key only for release date + signal fields, not metadata. + other_field_coalesce = ", ".join( + f"`{name}` = coalesce(%({name})s, `{name}`)" for name in EXPECTED_GROUPS + ) + + sql = f""" INSERT INTO `flusurv` ( - `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, - `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, - `rate_age_5`, `rate_age_6`, `rate_age_7` + `release_date`, + {other_field_names} ) VALUES ( - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + %(release_date)s, + {other_field_values} ) ON DUPLICATE KEY UPDATE - `release_date` = least(`release_date`, %s), - `rate_age_0` = coalesce(%s, `rate_age_0`), - `rate_age_1` = coalesce(%s, `rate_age_1`), - `rate_age_2` = coalesce(%s, `rate_age_2`), - `rate_age_3` = coalesce(%s, `rate_age_3`), - `rate_age_4` = coalesce(%s, `rate_age_4`), - `rate_overall` = coalesce(%s, `rate_overall`), - `rate_age_5` = coalesce(%s, `rate_age_5`), - `rate_age_6` = coalesce(%s, `rate_age_6`), - `rate_age_7` = coalesce(%s, `rate_age_7`) + `release_date` = least(`release_date`, %(release_date)s), + {other_field_coalesce} """ # insert/update each row of data (one per epiweek) for epiweek in epiweeks: - lag = delta_epiweeks(epiweek, issue) - if lag > 52: - # Ignore values older than one year, as (1) they are assumed not to - # change, and (2) it would adversely affect database performance if all - # values (including duplicates) were stored on each run. + lag = delta_epiweeks(epiweek, fetcher.metadata.issue) + if lag > fetcher.metadata.max_age_weeks: + # Ignore obs older than `max_age_weeks` from user command line + # argument `max_age`. We restricted our FluSurv API call to only + # seasons older than `max_age_weeks` (in order to reduce the + # amount of data being pulled), but the season by the cutoff date + # may still have some weeks older than we'd like to include. continue - args_meta = [release_date, issue, epiweek, location, lag] - args_insert = data[epiweek] - args_update = [release_date] + data[epiweek] - cur.execute(sql, tuple(args_meta + args_insert + args_update)) + + missing_expected_groups = EXPECTED_GROUPS - data[epiweek].keys() + # Remove the season description since we also store it in each epiweek obj + unexpected_groups = data[epiweek].keys() - EXPECTED_GROUPS - {"season"} + if len(missing_expected_groups) != 0: + warn( + f"{location} {epiweek} data is missing group(s) {missing_expected_groups}" + ) + # Fill in expected values with `None` so SQL query injection below + # doesn't fail with a key error. + for key in missing_expected_groups: + data[epiweek][key] = None + if len(unexpected_groups) != 0: + warn( + f"{location} {epiweek} data includes new group(s) {unexpected_groups}" + ) + # Remove unexpected values from the data. Construction of the SQL + # query below fetches values by key, so these would be ignored even + # if we left them. + for key in unexpected_groups: + del data[epiweek][key] + + args_meta = { + # the date when this record was first received by Delphi + "release_date": release_date, + # the epiweek of receipt by Delphi (e.g. issue 201453 includes + # epiweeks up to and including 2014w53, but not 2015w01 or + # following) + "issue": fetcher.metadata.issue, + # the epiweek during which the data was collected + "epiweek": epiweek, + "location": location, + "lag": lag + } + cur.execute(sql, {**args_meta, **data[epiweek]}) # commit and disconnect rows2 = get_rows(cur) @@ -163,8 +165,15 @@ def main(): # fmt: off parser.add_argument( "location", + type=str, help='location for which data should be scraped (e.g. "CA" or "all")' ) + parser.add_argument( + "--max-age", + default=MAX_AGE_TO_CONSIDER_WEEKS, + type=int, + help="age in weeks of data to ingest" + ) parser.add_argument( "--test", "-t", @@ -175,18 +184,19 @@ def main(): # fmt: on args = parser.parse_args() - # scrape current issue from the main page - issue = flusurv.get_current_issue() - print(f"current issue: {int(issue)}") + fetcher = FlusurvLocationFetcher(args.max_age) + print(f"current issue: {int(fetcher.metadata.issue)}") # fetch flusurv data if args.location == "all": # all locations - for location in flusurv.location_codes.keys(): - update(issue, location, args.test) + for location in fetcher.metadata.locations: + update(fetcher, location, args.test) else: # single location - update(issue, args.location, args.test) + if (args.location not in fetcher.metadata.locations): + raise KeyError("Requested location {args.location} not available") + update(fetcher, args.location, args.test) if __name__ == "__main__": diff --git a/src/acquisition/flusurv/reference/new_grasp_location_result.json b/src/acquisition/flusurv/reference/new_grasp_location_result.json new file mode 100644 index 000000000..9b0588fce --- /dev/null +++ b/src/acquisition/flusurv/reference/new_grasp_location_result.json @@ -0,0 +1,38 @@ +### The JSON format is invalid as-is. To be able to read these reference files +### into python, first remove the comments. +# +### New location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# +# with payload +# {"appversion": "Public", "key": "getdata", "injson": [ +# {"networkid": 1, "catchmentid": 22, "seasonid": 30 }, +# {"networkid": 1, "catchmentid": 22, "seasonid": 49} +# ]} +# The "seasonid" parameter is required. To fetch all historical data, +# each season must be listed separately. Seasonids that don't exist don't +# cause errors, they are simply ignored. +# +# However, if no listed seasonids exist for the specified location, then +# the returned JSON is: +# { +# 'default_data': { +# 'response': 'No Data' +# } +# } +{ + # Data!! Format is list of dicts. Each obs' fields are + # labelled, so we could easily convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ] +} diff --git a/src/acquisition/flusurv/reference/new_grasp_result.json b/src/acquisition/flusurv/reference/new_grasp_result.json new file mode 100644 index 000000000..78d82e21d --- /dev/null +++ b/src/acquisition/flusurv/reference/new_grasp_result.json @@ -0,0 +1,170 @@ +### The JSON format is invalid as-is. To be able to read these reference files +### into python, first remove the comments. +# +### New API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03DataTool?appVersion=Public +# +# with payload +# {"appversion": "Public", "key": "", "injson": []} +{ + # Last data update date + 'loaddatetime': 'Aug 26, 2023', + # Text appearing on dashboard + 'app_text': [ + { + 'description': 'SplashScreenDisclaimerText', + 'text': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infection..." + } + ], + # IDs for different data sources/networks + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # Season labels, descriptions, IDs, and date ranges + 'seasons': [ + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2012-13', 'enabled': True, 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2015-16', 'enabled': True, 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'IncludeWeeklyRatesAndStrata': True}, + ... + ], + # Descriptions of data reporting groups (age, race, sex, and overall). Careful, some of these overlap. + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '65+ yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 5, 'Label': '75-84 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 5, 'Label': '85+', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], + # Colors by ID, presumably used in dashboard. Appears to be only some of the age groups. Also doesn't agree with "master_lookup" above. + 'colors_qualitative': [ + {'id': 1, 'hex': '#a6cee3'}, + {'id': 2, 'hex': '#1f78b4'}, + {'id': 3, 'hex': '#b2df8a'}, + {'id': 4, 'hex': '#33a02c'}, + {'id': 5, 'hex': '#fb9a99'}, + {'id': 6, 'hex': '#e31a1c'}, + {'id': 7, 'hex': '#fdbf6f'}, + {'id': 8, 'hex': '#ff7f00'}, + {'id': 9, 'hex': '#cab2d6'}, + {'id': 10, 'hex': '#6a3d9a'}, + {'id': 12, 'hex': '#b15928'} + ], + # List of ageids available for each network+catchement combo + 'age_catchment_lookup': [ + {'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 1, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 2, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 3, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 4, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 5, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 7, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 8, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 9, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 10, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 11, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 12, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 21, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 22, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 97, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 98, 'NetworkId': 1, 'CatchmentID': 22}, + {'ageID': 99, 'NetworkId': 1, 'CatchmentID': 22}, + {'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 1, 'NetworkId': 2, 'CatchmentID': 1}, + {'ageID': 2, 'NetworkId': 2, 'CatchmentID': 1}, + ... + ], + # List of seasonids available for each network+catchement combo + 'season_catchment_lookup': [ + {'seasonid': 49, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 50, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 51, 'NetworkId': 1, 'CatchmentID': 22}, + {'seasonid': 52, 'NetworkId': 1, 'CatchmentID': 22}, + ... + {'raceid': 4, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 5, 'NetworkId': 2, 'CatchmentID': 14}, + {'raceid': 99, 'NetworkId': 2, 'CatchmentID': 14}, + {'NetworkId': 2, 'CatchmentID': 17}, + ... + ], + # Data!! But only for the overall category. Format is list + # of dicts. Each obs' fields are labelled, so we could easily + # convert this to dataframe. + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ... + ], + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + ..., + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] +} diff --git a/src/acquisition/flusurv/reference/old_grasp_location_result.json b/src/acquisition/flusurv/reference/old_grasp_location_result.json new file mode 100644 index 000000000..9a6189044 --- /dev/null +++ b/src/acquisition/flusurv/reference/old_grasp_location_result.json @@ -0,0 +1,46 @@ +### The JSON format is invalid as-is. To be able to read these reference files +### into python, first remove the comments. +# +### Old location-specific API response from https://gis.cdc.gov/GRASP/Flu3/PostPhase03GetData?appVersion=Public +# +# with payload +# { +# "appversion": "Public", +# "networkid": location_code[0], +# "cacthmentid": location_code[1], +# } +{ + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + "dataseries": [ + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.7, 0.7], [2807, 41, 0.7, 0], [2808, 42, 1.4, 0.7], [2809, 43, 1.4, 0], [2810, 44, 2.1, 0.7], [2811, 45, 2.1, 0], [2812, 46, 2.1, 0], [2813, 47, 2.1, 0], [2814, 48, 2.8, 0.7], [2815, 49, 4.2, 1.4], [2816, 50, 4.2, 0], [2817, 51, 6.4, 2.1], [2818, 52, 8.5, 2.1], [2819, 1, 13.4, 4.9], [2820, 2, 17, 3.5], [2821, 3, 20.5, 3.5], [2822, 4, 26.2, 5.7], [2823, 5, 30.4, 4.2], [2824, 6, 40.3, 9.9], [2825, 7, 47.4, 7.1], [2826, 8, 53.7, 6.4], [2827, 9, 63.6, 9.9], [2828, 10, 74.9, 11.3], [2829, 11, 82, 7.1], [2830, 12, 91.2, 9.2], [2831, 13, 98.3, 7.1], [2832, 14, 103.9, 5.7], [2833, 15, 106.8, 2.8], [2834, 16, 109.6, 2.8], [2835, 17, 111.7, 2.1]]}, + {'season': 57, 'age': 2, 'data': [[2910, 40, 0, 0], [2911, 41, 0, 0], [2912, 42, 0, 0], [2913, 43, 0, 0], [2914, 44, 0.2, 0.2], [2915, 45, 0.4, 0.2], [2916, 46, 0.8, 0.4], [2917, 47, 0.8, 0], [2918, 48, 0.8, 0], [2919, 49, 1.3, 0.6], [2920, 50, 1.7, 0.4], [2921, 51, 1.9, 0.2], [2922, 52, 3.1, 1.1], [2923, 1, 4.8, 1.7], [2924, 2, 6.9, 2.1], [2925, 3, 7.9, 1], [2926, 4, 9.2, 1.3], [2927, 5, 10.5, 1.3], [2928, 6, 11.3, 0.8], [2929, 7, 11.5, 0.2], [2930, 8, 12.6, 1.1], [2931, 9, 13.8, 1.1], [2932, 10, 15.1, 1.3], [2933, 11, 15.9, 0.8], [2934, 12, 16.3, 0.4], [2935, 13, 16.7, 0.4], [2936, 14, 16.9, 0.2], [2937, 15, 16.9, 0], [2938, 16, 16.9, 0], [2939, 17, 17, 0.2]]}, + ... + ] + }, + 'seasons': [ + {'description': 'Season 2003-04', 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'color': 'Dark Purple', 'color_hexvalue': '#4A298B'}, + {'description': 'Season 2004-05', 'endweek': 2283, 'label': '2004-05', 'seasonid': 44, 'startweek': 2232, 'color': 'Brown', 'color_hexvalue': '#76522E'}, + {'description': 'Season 2005-06', 'endweek': 2335, 'label': '2005-06', 'seasonid': 45, 'startweek': 2284, 'color': 'Salmon', 'color_hexvalue': '#C76751'}, + {'description': 'Season 2006-07', 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'color': 'Purple', 'color_hexvalue': '#B92CC6'}, + {'description': 'Season 2007-08', 'endweek': 2439, 'label': '2007-08', 'seasonid': 47, 'startweek': 2388, 'color': 'Blue', 'color_hexvalue': '#2A44C8'}, + {'description': 'Season 2008-09', 'endweek': 2487, 'label': '2008-09', 'seasonid': 48, 'startweek': 2440, 'color': 'Green', 'color_hexvalue': '#299A42'}, + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'mmwr': [ + {'mmwrid': 2179, 'weekend': '2003-10-04', 'weeknumber': 40, 'weekstart': '2003-09-28', 'year': 2003, 'yearweek': 200340, 'seasonid': 43, 'label': '40', 'weekendlabel': 'Oct 04, 2003', 'weekendlabel2': 'Oct-04-2003'}, + {'mmwrid': 2180, 'weekend': '2003-10-11', 'weeknumber': 41, 'weekstart': '2003-10-05', 'year': 2003, 'yearweek': 200341, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 11, 2003', 'weekendlabel2': 'Oct-11-2003'}, + {'mmwrid': 2181, 'weekend': '2003-10-18', 'weeknumber': 42, 'weekstart': '2003-10-12', 'year': 2003, 'yearweek': 200342, 'seasonid': 43, 'label': '', 'weekendlabel': 'Oct 18, 2003', 'weekendlabel2': 'Oct-18-2003'}, + ... + ] +} diff --git a/src/acquisition/flusurv/reference/old_grasp_result.json b/src/acquisition/flusurv/reference/old_grasp_result.json new file mode 100644 index 000000000..5b70e361a --- /dev/null +++ b/src/acquisition/flusurv/reference/old_grasp_result.json @@ -0,0 +1,92 @@ +### The JSON format is invalid as-is. To be able to read these reference files +### into python, first remove the comments. +# +### Old API response from https://gis.cdc.gov/GRASP/Flu3/GetPhase03InitApp?appVersion=Public +{ + 'loaddatetime': 'Mar 20, 2021', + 'disclaimer': { + 'id': 1, + 'splashscreensisclaimersext': 'The Influenza Hospitalization Surveillance Network (FluSurv-NET) conducts population-based surveillance for laboratory-confirmed influenza-associated hospitalizations in children (persons younger than 18 years) and adults. The current network covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, and TN) and three additional states (MI, OH, and UT). The network represents approximately 8.5% of US population (~27 million people).

Cases are identified by reviewing hospital, laboratory, and admission databases and infection control logs for patients hospitalized during the influenza season with a documented positive influenza test (i.e., viral culture, direct/indirect fluorescent antibody assay (DFA/IFA), reverse transcription-polymerase chain reaction (RT-PCR), or a rapid influenza diagnostic test (RIDT)).

Data gathered are used to estimate age-specific hospitalization rates on a weekly basis, and describe characteristics of persons hospitalized with severe influenza illness. Laboratory-confirmation is dependent on clinician-ordered influenza testing. Therefore, the rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications.

FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. Please use the following citation when referencing these data: “FluView: Influenza Hospitalization Surveillance Network, Centers for Disease Control and Prevention. WEBSITE. Accessed on DATE”.', + 'pagedisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. \r\n All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza associated \r\n hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). \r\n The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) \r\n and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance \r\n surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, \r\n and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 season and later. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) \r\n population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as \r\n influenza-associated hospitalizations can be missed if influenza is not suspected and tested for.", + 'surveillancehelptext': 'The Emerging Infections Program (EIP) has conducted ongoing population-based influenza-hospitalization surveillance since the 2003-2004 season. EIP sites include counties within CA, CO, CT, GA, MD, MN, NM, NY, OR, TN. The Influenza Hospitalization Surveillance Project (IHSP) began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included counties within IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014, 2014-15 and 2015-2016 seasons. Together, the EIP and IHSP sites comprise the Influenza Hospitalization Surveillance Network (FluSurv-NET). The FluSurv-NET represents approximately 9% of the US population (~27 million people). FluSurv-NET hospitalization data are preliminary and subject to change as more data become available. All incidence rates are unadjusted.', + 'groupbyhelptext': 'Age Groups: During the 2003-2004 and 2004-2005 seasons, Emerging Infections Program (EIP) sites conducted population-based influenza-hospitalization surveillance for pediatric (persons younger than 18 years) cases only. Surveillance for adults began during the 2005-2006 season and all-age surveillance for laboratory-confirmed influenza associated hospitalizations has been ongoing since then. Seasons: Most calendar years can be divided into 52 epidemiologic weeks (MMWR weeks), but occasionally some years will have 53 weeks. For the Influenza Surveillance Network (FluSurv-Net), the influenza season is normally from October 1 through April 30 (MMWR Week 40-17). However, due to the onset of the influenza A (H1N1)pdm09 pandemic in spring of 2009, the 2008-2009 influenza season ended April 14, 2009 and the 2009-2010 season began September 1, 2009 (MMWR Week 35). The number of sites contributing to the influenza-hospitalization surveillance network has changed over the years. Please, refer to the Surveillance Area Help Button for more detailed information.', + 'downloaddatadisclaimertext': "The Influenza Hospitalization Surveillance Network (FluSurv-NET) data are preliminary and subject to change as more data become available. All incidence rates are unadjusted. FluSurv-NET conducts population-based surveillance for laboratory-confirmed influenza related hospitalizations in children <18 years of age (since 2003-2004 influenza season) and adults (since 2005-2006 influenza season). The FluSurv-NET covers over 70 counties in the 10 Emerging Infections Program (EIP) states (CA, CO, CT, GA, MD, MN, NM, NY, OR, TN) and additional Influenza Hospitalization Surveillance Project (IHSP) states. The IHSP began during the 2009-2010 season to enhance surveillance during the 2009 H1N1 pandemic. IHSP sites included IA, ID, MI, OK and SD during 2009-2010 season; ID, MI, OH, OK, RI, and UT during the 2010-2011 season; MI, OH, RI, and UT during the 2011-2012 season; IA, MI, OH, RI, and UT during the 2012-2013 season; and MI, OH, and UT during the 2013-2014 , 2014-2015 and 2015-2016 seasons. Cumulative and weekly unadjusted incidence rates (per 100,000 population) are calculated using the National Center for Health Statistics' (NCHS) population estimates for the counties included in the surveillance catchment area. The rates provided are likely to be underestimated as influenza-related hospitalizations can be missed, either because testing is not performed, or because cases may be attributed to other causes of pneumonia or other common influenza-related complications." + }, + 'networks': [ + {'networkid': 1, 'name': 'FluSurv-NET'}, + {'networkid': 2, 'name': 'EIP'}, + {'networkid': 3, 'name': 'IHSP'} + ], + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + 'seasons': [ + {'description': 'Season 2009-10', 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'color': 'Red', 'color_hexvalue': '#A12732'}, + {'description': 'Season 2010-11', 'endweek': 2596, 'label': '2010-11', 'seasonid': 50, 'startweek': 2545, 'color': 'Mustard', 'color_hexvalue': '#BDAA2A'}, + {'description': 'Season 2011-12', 'endweek': 2648, 'label': '2011-12', 'seasonid': 51, 'startweek': 2597, 'color': 'Light Blue', 'color_hexvalue': '#3289A4'}, + {'description': 'Season 2012-13', 'endweek': 2700, 'label': '2012-13', 'seasonid': 52, 'startweek': 2649, 'color': 'Grey', 'color_hexvalue': '#5E5E5E'}, + {'description': 'Season 2013-14', 'endweek': 2752, 'label': '2013-14', 'seasonid': 53, 'startweek': 2701, 'color': 'Light Blue', 'color_hexvalue': '#42B5C8'}, + {'description': 'Season 2014-15', 'endweek': 2805, 'label': '2014-15', 'seasonid': 54, 'startweek': 2753, 'color': 'Mint', 'color_hexvalue': '#4EC87B'}, + {'description': 'Season 2015-16', 'endweek': 2857, 'label': '2015-16', 'seasonid': 55, 'startweek': 2806, 'color': 'Orange', 'color_hexvalue': '#C7852E'}, + {'description': 'Season 2016-17', 'endweek': 2909, 'label': '2016-17', 'seasonid': 56, 'startweek': 2858, 'color': 'Purple', 'color_hexvalue': '#7F42A9'}, + {'description': 'Season 2017-18', 'endweek': 2961, 'label': '2017-18', 'seasonid': 57, 'startweek': 2910, 'color': 'Lime', 'color_hexvalue': '#8AC73C'}, + {'description': 'Season 2018-19', 'endweek': 3013, 'label': '2018-19', 'seasonid': 58, 'startweek': 2962, 'color': 'Brown', 'color_hexvalue': '#A06C3A'}, + {'description': 'Season 2019-20', 'endweek': 3065, 'label': '2019-20', 'seasonid': 59, 'startweek': 3014, 'color': 'Light Orange', 'color_hexvalue': '#FFCF48'} + ], + 'ages': [ + {'label': '85+', 'ageid': 9, 'color_hexvalue': '#1f78b4'}, + {'label': '75-84 yr', 'ageid': 8, 'color_hexvalue': '#CAB2D6'}, + {'label': '65-74 yr', 'ageid': 7, 'color_hexvalue': '#A6CEE3'}, + {'label': 'Overall', 'ageid': 6, 'color_hexvalue': '#000000'}, + {'label': '65+ yr', 'ageid': 5, 'color_hexvalue': '#6AA61E'}, + {'label': '50-64 yr', 'ageid': 4, 'color_hexvalue': '#E7298A'}, + {'label': '18-49 yr', 'ageid': 3, 'color_hexvalue': '#4A298B'}, + {'label': '5-17 yr', 'ageid': 2, 'color_hexvalue': '#D95F02'}, + {'label': '0-4 yr', 'ageid': 1, 'color_hexvalue': '#1B9E77'} + ], + 'busdata': { + 'datafields': ['mmwrid', 'weeknumber', 'rate', 'weeklyrate'], + 'dataseries': [ + ..., + {'season': 56, 'age': 4, 'data': [[2858, 40, 0.1, 0.1], [2859, 41, 0.2, 0.1], [2860, 42, 0.3, 0.1], [2861, 43, 0.5, 0.1], [2862, 44, 0.6, 0.1], [2863, 45, 0.8, 0.2], [2864, 46, 1, 0.2], [2865, 47, 1.4, 0.4], [2866, 48, 1.7, 0.4], [2867, 49, 2.3, 0.5], [2868, 50, 3.3, 1.1], [2869, 51, 4.8, 1.5], [2870, 52, 7.8, 2.9], [2871, 1, 11.7, 4], [2872, 2, 15.7, 3.9], [2873, 3, 19.5, 3.8], [2874, 4, 22.9, 3.4], [2875, 5, 26.3, 3.4], [2876, 6, 31.1, 4.8], [2877, 7, 36.1, 5], [2878, 8, 41.5, 5.3], [2879, 9, 44.7, 3.2], [2880, 10, 48, 3.3], [2881, 11, 50.9, 2.9], [2882, 12, 54, 3.1], [2883, 13, 57.3, 3.3], [2884, 14, 59.3, 2], [2885, 15, 60.8, 1.5], [2886, 16, 62, 1.2], [2887, 17, 62.7, 0.8]]}, + {'season': 55, 'age': 8, 'data': [[2806, 40, 0.6, 0.6], [2807, 41, 1, 0.4], [2808, 42, 1.3, 0.3], [2809, 43, 1.3, 0], [2810, 44, 1.8, 0.5], [2811, 45, 2.3, 0.5], [2812, 46, 2.4, 0.1], [2813, 47, 2.4, 0], [2814, 48, 2.8, 0.4], [2815, 49, 3.2, 0.4], [2816, 50, 4, 0.8], [2817, 51, 5.5, 1.6], [2818, 52, 6.4, 0.9], [2819, 1, 8, 1.6], [2820, 2, 9.6, 1.6], [2821, 3, 10.9, 1.3], [2822, 4, 12.6, 1.8], [2823, 5, 14.8, 2.2], [2824, 6, 19.6, 4.7], [2825, 7, 25.1, 5.5], [2826, 8, 30.9, 5.8], [2827, 9, 41.4, 10.5], [2828, 10, 54.2, 12.8], [2829, 11, 63.5, 9.3], [2830, 12, 73.9, 10.4], [2831, 13, 83.5, 9.6], [2832, 14, 90.3, 6.8], [2833, 15, 95.6, 5.3], [2834, 16, 100.7, 5.1], [2835, 17, 103.7, 3]]}, + {'season': 59, 'age': 11, 'data': [[3014, 40, 0, 0], [3015, 41, 0.1, 0], [3016, 42, 0.2, 0.1], [3017, 43, 0.2, 0], [3018, 44, 0.3, 0.1], [3019, 45, 0.5, 0.2], [3020, 46, 0.8, 0.3], [3021, 47, 1.1, 0.4], [3022, 48, 1.7, 0.5], [3023, 49, 2.5, 0.9], [3024, 50, 3.5, 1], [3025, 51, 4.8, 1.3], [3026, 52, 7.2, 2.4], [3027, 1, 10.2, 3], [3028, 2, 12.7, 2.5], [3029, 3, 14.7, 2], [3030, 4, 17.1, 2.3], [3031, 5, 19.9, 2.8], [3032, 6, 23.5, 3.6], [3033, 7, 25.7, 2.2], [3034, 8, 27.9, 2.2], [3035, 9, 30, 2], [3036, 10, 31.9, 1.9], [3037, 11, 33.2, 1.4], [3038, 12, 34.1, 0.8], [3039, 13, 34.2, 0.1], [3040, 14, 34.2, 0], [3041, 15, 34.2, 0], [3042, 16, 34.3, 0.1], [3043, 17, 34.3, 0]]}, + {'season': 50, 'age': 4, 'data': [[2545, 40, 0.1, 0.1], [2546, 41, 0.1, 0.1], [2547, 42, 0.2, 0.1], [2548, 43, 0.2, 0.1], [2549, 44, 0.3, 0.1], [2550, 45, 0.3, 0.1], [2551, 46, 0.6, 0.2], [2552, 47, 0.7, 0.2], [2553, 48, 0.9, 0.2], [2554, 49, 1.2, 0.3], [2555, 50, 1.5, 0.3], [2556, 51, 2, 0.5], [2557, 52, 2.7, 0.7], [2558, 1, 3.6, 0.9], [2559, 2, 4.6, 1], [2560, 3, 5.9, 1.2], [2561, 4, 7.3, 1.4], [2562, 5, 8.9, 1.6], [2563, 6, 10.5, 1.6], [2564, 7, 12.7, 2.2], [2565, 8, 15.2, 2.5], [2566, 9, 17.2, 2], [2567, 10, 18.7, 1.5], [2568, 11, 19.9, 1.3], [2569, 12, 20.7, 0.8], [2570, 13, 21.2, 0.5], [2571, 14, 21.5, 0.4], [2572, 15, 21.7, 0.2], [2573, 16, 21.8, 0.1], [2574, 17, 21.9, 0.1]]}, + {'season': 58, 'age': 1, 'data': [[2962, 40, 0.1, 0.1], [2963, 41, 0.3, 0.2], [2964, 42, 0.3, 0.1], [2965, 43, 0.5, 0.2], [2966, 44, 1, 0.5], [2967, 45, 1.4, 0.4], [2968, 46, 1.7, 0.3], [2969, 47, 2.3, 0.6], [2970, 48, 3.6, 1.3], [2971, 49, 5.5, 1.9], [2972, 50, 8.5, 3], [2973, 51, 12.3, 3.8], [2974, 52, 17.7, 5.4], [2975, 1, 21.7, 3.9], [2976, 2, 24.9, 3.2], [2977, 3, 27.4, 2.5], [2978, 4, 30.5, 3.1], [2979, 5, 34.5, 4.1], [2980, 6, 37.6, 3.1], [2981, 7, 41.6, 4], [2982, 8, 46.2, 4.6], [2983, 9, 50.2, 3.9], [2984, 10, 54.9, 4.7], [2985, 11, 59.2, 4.3], [2986, 12, 62.6, 3.4], [2987, 13, 65.3, 2.7], [2988, 14, 67.6, 2.3], [2989, 15, 68.9, 1.3], [2990, 16, 69.8, 0.9], [2991, 17, 70.9, 1]]}, + {'season': 52, 'age': 10, 'data': [[2649, 40, 0, 0], [2649, 40, 0, 0], [2650, 41, 0, 0], [2650, 41, 0, 0], [2651, 42, 0.1, 0], [2651, 42, 0, 0], [2652, 43, 0.1, 0], [2652, 43, 0, 0], [2653, 44, 0.1, 0], [2653, 44, 0, 0], [2654, 45, 0.2, 0.1], [2654, 45, 0.1, 0], [2655, 46, 0.5, 0.1], [2655, 46, 0.1, 0], [2656, 47, 0.8, 0.2], [2656, 47, 0.2, 0], [2657, 48, 1.3, 0.2], [2657, 48, 0.3, 0], [2658, 49, 2.3, 0.7], [2658, 49, 0.5, 0.1], [2659, 50, 3.7, 0.7], [2659, 50, 0.7, 0.1], [2660, 51, 5.3, 0.9], [2660, 51, 1.1, 0.2], [2661, 52, 7.4, 1.2], [2661, 52, 1.5, 0.2], [2662, 1, 9.9, 1.3], [2662, 1, 1.9, 0.2], [2663, 2, 12.2, 1.1], [2663, 2, 2.3, 0.2], [2664, 3, 14.4, 1], [2664, 3, 2.6, 0.2], [2665, 4, 16.3, 0.9], [2665, 4, 2.9, 0.2], [2666, 5, 17.8, 0.6], [2666, 5, 3.2, 0.1], [2667, 6, 19, 0.5], [2667, 6, 3.3, 0.1], [2668, 7, 20, 0.6], [2668, 7, 3.5, 0.1], [2669, 8, 21.1, 0.5], [2669, 8, 3.7, 0.1], [2670, 9, 22, 0.4], [2670, 9, 3.8, 0.1], [2671, 10, 22.8, 0.4], [2671, 10, 4, 0.1], [2672, 11, 23.4, 0.3], [2672, 11, 4.1, 0], [2673, 12, 23.8, 0.2], [2673, 12, 4.1, 0], [2674, 13, 24.2, 0.2], [2674, 13, 4.2, 0], [2675, 14, 24.6, 0.2], [2675, 14, 4.2, 0], [2676, 15, 24.9, 0.1], [2676, 15, 4.3, 0], [2677, 16, 25.1, 0.1], [2677, 16, 4.3, 0], [2678, 17, 25.2, 0.1], [2678, 17, 4.3, 0]]}, + {'season': 54, 'age': 7, 'data': [[2753, 40, 0.2, 0.2], [2754, 41, 0.4, 0.1], [2755, 42, 0.9, 0.5], [2756, 43, 1, 0.1], [2757, 44, 1.1, 0.1], [2758, 45, 1.6, 0.4], [2759, 46, 2, 0.5], [2760, 47, 2.9, 0.9], [2761, 48, 5.3, 2.3], [2762, 49, 9.9, 4.7], [2763, 50, 17.6, 7.7], [2764, 51, 30.1, 12.5], [2765, 52, 49.4, 19.3], [2766, 53, 69, 19.5], [2767, 1, 83.2, 14.2], [2768, 2, 91.4, 8.2], [2769, 3, 98.6, 7.3], [2770, 4, 104.6, 5.9], [2771, 5, 110.9, 6.3], [2772, 6, 116, 5.2], [2773, 7, 119.2, 3.1], [2774, 8, 122.3, 3.2], [2775, 9, 124.5, 2.1], [2776, 10, 127.5, 3], [2777, 11, 130.1, 2.7], [2778, 12, 132.4, 2.3], [2779, 13, 135, 2.6], [2780, 14, 137.7, 2.7], [2781, 15, 139.1, 1.4], [2782, 16, 140.3, 1.1], [2783, 17, 141, 0.8]]} + ] + }, + 'mmwr': [ + ..., + {'mmwrid': 3038, 'weekend': '2020-03-21', 'weeknumber': 12, 'weekstart': '2020-03-15', 'year': 2020, 'yearweek': 202012, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 21, 2020', 'weekendlabel2': 'Mar-21-2020'}, + {'mmwrid': 3039, 'weekend': '2020-03-28', 'weeknumber': 13, 'weekstart': '2020-03-22', 'year': 2020, 'yearweek': 202013, 'seasonid': 59, 'label': '', 'weekendlabel': 'Mar 28, 2020', 'weekendlabel2': 'Mar-28-2020'}, + {'mmwrid': 3040, 'weekend': '2020-04-04', 'weeknumber': 14, 'weekstart': '2020-03-29', 'year': 2020, 'yearweek': 202014, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 04, 2020', 'weekendlabel2': 'Apr-04-2020'}, + {'mmwrid': 3041, 'weekend': '2020-04-11', 'weeknumber': 15, 'weekstart': '2020-04-05', 'year': 2020, 'yearweek': 202015, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 11, 2020', 'weekendlabel2': 'Apr-11-2020'}, + {'mmwrid': 3042, 'weekend': '2020-04-18', 'weeknumber': 16, 'weekstart': '2020-04-12', 'year': 2020, 'yearweek': 202016, 'seasonid': 59, 'label': '', 'weekendlabel': 'Apr 18, 2020', 'weekendlabel2': 'Apr-18-2020'}, + {'mmwrid': 3043, 'weekend': '2020-04-25', 'weeknumber': 17, 'weekstart': '2020-04-19', 'year': 2020, 'yearweek': 202017, 'seasonid': 59, 'label': '17', 'weekendlabel': 'Apr 25, 2020', 'weekendlabel2': 'Apr-25-2020'} + ] +} diff --git a/src/ddl/fluview.sql b/src/ddl/fluview.sql index 11f10c9dc..31cf26fbe 100644 --- a/src/ddl/fluview.sql +++ b/src/ddl/fluview.sql @@ -329,7 +329,7 @@ CREATE TABLE `fluview_public` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /* -`flusurv` stores FluSurv-NET data (flu hospitaliation rates) as published by +`flusurv` stores FluSurv-NET data (flu hospitalization rates) as published by CDC. Data is public. @@ -345,25 +345,58 @@ Note that the flusurv age groups are, in general, not the same as the ILINet particular "catchment" (e.g. 'network_all', 'CA', 'NY_albany') rather than by regions and states in general. -+--------------+-------------+------+-----+---------+----------------+ -| Field | Type | Null | Key | Default | Extra | -+--------------+-------------+------+-----+---------+----------------+ -| id | int(11) | NO | PRI | NULL | auto_increment | -| release_date | date | NO | MUL | NULL | | -| issue | int(11) | NO | MUL | NULL | | -| epiweek | int(11) | NO | MUL | NULL | | -| location | varchar(32) | NO | MUL | NULL | | -| lag | int(11) | NO | MUL | NULL | | -| rate_age_0 | double | YES | | NULL | | -| rate_age_1 | double | YES | | NULL | | -| rate_age_2 | double | YES | | NULL | | -| rate_age_3 | double | YES | | NULL | | -| rate_age_4 | double | YES | | NULL | | -| rate_overall | double | YES | | NULL | | -| rate_age_5 | double | YES | | NULL | | -| rate_age_6 | double | YES | | NULL | | -| rate_age_7 | double | YES | | NULL | | -+--------------+-------------+------+-----+---------+----------------+ ++-------------------+-------------+------+-----+---------+----------------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------+-------------+------+-----+---------+----------------+ +| id | int(11) | NO | PRI | NULL | auto_increment | +| release_date | date | NO | MUL | NULL | | +| issue | int(11) | NO | MUL | NULL | | +| epiweek | int(11) | NO | MUL | NULL | | +| location | varchar(32) | NO | MUL | NULL | | +| lag | int(11) | NO | MUL | NULL | | +| rate_age_0 | double | YES | | NULL | | +| rate_age_1 | double | YES | | NULL | | +| rate_age_2 | double | YES | | NULL | | +| rate_age_3 | double | YES | | NULL | | +| rate_age_4 | double | YES | | NULL | | +| rate_overall | double | YES | | NULL | | +| rate_age_5 | double | YES | | NULL | | +| rate_age_6 | double | YES | | NULL | | +| rate_age_7 | double | YES | | NULL | | +| rate_age_18t29 | double | YES | | NULL | | +| rate_age_30t39 | double | YES | | NULL | | +| rate_age_40t49 | double | YES | | NULL | | +| rate_age_5t11 | double | YES | | NULL | | +| rate_age_12t17 | double | YES | | NULL | | +| rate_age_lt18 | double | YES | | NULL | | +| rate_age_gte18 | double | YES | | NULL | | +| rate_race_white | double | YES | | NULL | | +| rate_race_black | double | YES | | NULL | | +| rate_race_hisp | double | YES | | NULL | | +| rate_race_asian | double | YES | | NULL | | +| rate_race_natamer | double | YES | | NULL | | +| rate_sex_male | double | YES | | NULL | | +| rate_sex_female | double | YES | | NULL | | +| season | char(7) | YES | | NULL | | ++-------------------+-------------+------+-----+---------+----------------+ + +id: unique identifier for each record +release_date: the date when this record was first received by Delphi +issue: the epiweek of receipt by Delphi (e.g. issue 201453 includes epiweeks up to + and including 2014w53, but not 2015w01 or following) +epiweek: the epiweek during which the data was collected +location: the name of the catchment (e.g. 'network_all', 'CA', 'NY_albany') +lag: number of weeks between `epiweek` and `issue` +rate_age_0: hospitalization rate for ages 0-4 +rate_age_1: hospitalization rate for ages 5-17 +rate_age_2: hospitalization rate for ages 18-49 +rate_age_3: hospitalization rate for ages 50-64 +rate_age_4: hospitalization rate for ages 65+ +rate_overall: overall hospitalization rate +rate_age_5: hospitalization rate for ages 65-74 +rate_age_6: hospitalization rate for ages 75-84 +rate_age_7: hospitalization rate for ages 85+ + */ CREATE TABLE `flusurv` ( @@ -373,6 +406,7 @@ CREATE TABLE `flusurv` ( `epiweek` int(11) NOT NULL, `location` varchar(32) NOT NULL, `lag` int(11) NOT NULL, + `season` char(7) DEFAULT NULL, `rate_age_0` double DEFAULT NULL, `rate_age_1` double DEFAULT NULL, `rate_age_2` double DEFAULT NULL, @@ -382,6 +416,25 @@ CREATE TABLE `flusurv` ( `rate_age_5` double DEFAULT NULL, `rate_age_6` double DEFAULT NULL, `rate_age_7` double DEFAULT NULL, + `rate_age_18t29` double DEFAULT NULL, + `rate_age_30t39` double DEFAULT NULL, + `rate_age_40t49` double DEFAULT NULL, + `rate_age_5t11` double DEFAULT NULL, + `rate_age_12t17` double DEFAULT NULL, + `rate_age_lt18` double DEFAULT NULL, + `rate_age_gte18` double DEFAULT NULL, + `rate_age_1t4` double DEFAULT NULL, + `rate_age_gte75` double DEFAULT NULL, + `rate_age_0tlt1` double DEFAULT NULL, + `rate_race_white` double DEFAULT NULL, + `rate_race_black` double DEFAULT NULL, + `rate_race_hisp` double DEFAULT NULL, + `rate_race_asian` double DEFAULT NULL, + `rate_race_natamer` double DEFAULT NULL, + `rate_sex_male` double DEFAULT NULL, + `rate_sex_female` double DEFAULT NULL, + `rate_flu_a` double DEFAULT NULL, + `rate_flu_b` double DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `issue` (`issue`,`epiweek`,`location`), KEY `release_date` (`release_date`), diff --git a/src/ddl/migrations/flusurv_age_sex_race_strata.sql b/src/ddl/migrations/flusurv_age_sex_race_strata.sql new file mode 100644 index 000000000..88ff37c24 --- /dev/null +++ b/src/ddl/migrations/flusurv_age_sex_race_strata.sql @@ -0,0 +1,23 @@ +-- Add new age, race, and sex strata, and season descriptor (YYYY-YY format) +ALTER TABLE `flusurv` ADD ( + `rate_age_18t29` double DEFAULT NULL, + `rate_age_30t39` double DEFAULT NULL, + `rate_age_40t49` double DEFAULT NULL, + `rate_age_5t11` double DEFAULT NULL, + `rate_age_12t17` double DEFAULT NULL, + `rate_age_lt18` double DEFAULT NULL, + `rate_age_gte18` double DEFAULT NULL, + `rate_race_white` double DEFAULT NULL, + `rate_race_black` double DEFAULT NULL, + `rate_race_hisp` double DEFAULT NULL, + `rate_race_asian` double DEFAULT NULL, + `rate_race_natamer` double DEFAULT NULL, + `rate_sex_male` double DEFAULT NULL, + `rate_sex_female` double DEFAULT NULL, + `rate_age_0tlt1` double DEFAULT NULL, + `rate_age_1t4` double DEFAULT NULL, + `rate_age_gte75` double DEFAULT NULL, + `rate_flu_a` double DEFAULT NULL, + `rate_flu_b` double DEFAULT NULL, + `season` char(7) DEFAULT NULL, +); diff --git a/src/ddl/migrations/flusurv_new_signals.sql b/src/ddl/migrations/flusurv_new_signals.sql new file mode 100644 index 000000000..f9cd80d1d --- /dev/null +++ b/src/ddl/migrations/flusurv_new_signals.sql @@ -0,0 +1,23 @@ +ALTER TABLE flusurv ADD ( + `season` char(7) DEFAULT NULL, + + `rate_age_18t29` double DEFAULT NULL, + `rate_age_30t39` double DEFAULT NULL, + `rate_age_40t49` double DEFAULT NULL, + `rate_age_5t11` double DEFAULT NULL, + `rate_age_12t17` double DEFAULT NULL, + `rate_age_lt18` double DEFAULT NULL, + `rate_age_gte18` double DEFAULT NULL, + `rate_age_1t4` double DEFAULT NULL, + `rate_age_gte75` double DEFAULT NULL, + `rate_age_0tlt1` double DEFAULT NULL, + `rate_race_white` double DEFAULT NULL, + `rate_race_black` double DEFAULT NULL, + `rate_race_hisp` double DEFAULT NULL, + `rate_race_asian` double DEFAULT NULL, + `rate_race_natamer` double DEFAULT NULL, + `rate_sex_male` double DEFAULT NULL, + `rate_sex_female` double DEFAULT NULL, + `rate_flu_a` double DEFAULT NULL, + `rate_flu_b` double DEFAULT NULL +); diff --git a/src/server/endpoints/flusurv.py b/src/server/endpoints/flusurv.py index 08b2a14d9..283d359a7 100644 --- a/src/server/endpoints/flusurv.py +++ b/src/server/endpoints/flusurv.py @@ -19,7 +19,7 @@ def handle(): # basic query info q = QueryBuilder("flusurv", "fs") - fields_string = ["release_date", "location"] + fields_string = ["release_date", "location", "season"] fields_int = ["issue", "epiweek", "lag"] fields_float = [ "rate_age_0", @@ -28,6 +28,31 @@ def handle(): "rate_age_3", "rate_age_4", "rate_overall", + "rate_age_5", + "rate_age_6", + "rate_age_7", + "rate_age_18t29", + "rate_age_30t39", + "rate_age_40t49", + "rate_age_5t11", + "rate_age_12t17", + "rate_age_lt18", + "rate_age_gte18", + "rate_age_1t4", + "rate_age_gte75", + "rate_age_0tlt1", + + "rate_race_white", + "rate_race_black", + "rate_race_hisp", + "rate_race_asian", + "rate_race_natamer", + + "rate_sex_male", + "rate_sex_female", + + "rate_flu_a", + "rate_flu_b" ] q.set_fields(fields_string, fields_int, fields_float) q.set_sort_order("epiweek", "location", "issue") diff --git a/tests/acquisition/flusurv/test_flusurv.py b/tests/acquisition/flusurv/test_flusurv.py index e0c5acaad..18eb1aa33 100644 --- a/tests/acquisition/flusurv/test_flusurv.py +++ b/tests/acquisition/flusurv/test_flusurv.py @@ -2,13 +2,217 @@ # standard library import unittest -from unittest.mock import MagicMock -from unittest.mock import sentinel +from collections import defaultdict +from unittest.mock import (MagicMock, sentinel, patch) -from delphi.epidata.acquisition.flusurv.flusurv import fetch_json +import delphi.epidata.acquisition.flusurv.api as flusurv # py3tester coverage target -__test_target__ = "delphi.epidata.acquisition.flusurv.flusurv" +__test_target__ = "delphi.epidata.acquisition.flusurv.api" + + +network_all_example_data = [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 20.7, 'weeklyrate': 0.0, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'flutype': 0, 'rate': 41.3, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 2, 'raceid': 0, 'flutype': 0, 'rate': 1, 'weeklyrate': 0.1, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 14, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 2, 'weeklyrate': 0.2, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 8, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 3, 'weeklyrate': 0.3, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 22, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 4, 'weeklyrate': 0.4, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 0, 'flutype': 1, 'rate': 5, 'weeklyrate': 0.5, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 0, 'flutype': 2, 'rate': 6, 'weeklyrate': 0.6, 'mmwrid': 2519}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 4.3, 'weeklyrate': 1.7, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'flutype': 0, 'rate': 11.6, 'weeklyrate': 3.6, 'mmwrid': 2493}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'flutype': 0, 'rate': 12.8, 'weeklyrate': 4.8, 'mmwrid': 2493}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'flutype': 0, 'rate': 40.7, 'weeklyrate': 0.5, 'mmwrid': 2516}, + + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'flutype': 0, 'rate': 20.3, 'weeklyrate': 0.1, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 2, 'flutype': 0, 'rate': 39.6, 'weeklyrate': 0.3, 'mmwrid': 2513}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 3, 'flutype': 0, 'rate': 36.0, 'weeklyrate': 0.1, 'mmwrid': 2513}, + ] + +by_epiweek_example_data = { + 201014: {"rate_race_white": 0.0, "rate_race_black": 0.1, "rate_age_0": 0.5, "rate_sex_female": 0.1, + "rate_age_1t4": 0.2, "rate_age_6": 0.3, "rate_age_12t17": 0.4, "rate_flu_a": 0.5, "rate_flu_b": 0.6, + "season": "2009-10"}, + 200940: {"rate_race_white": 1.7, "rate_race_black": 3.6, "rate_race_hisp": 4.8, "season": "2009-10"}, + 201011: {"rate_race_white": 0.1, "rate_race_black": 0.5, "season": "2009-10"}, + 201008: {"rate_race_white": 0.1, "rate_race_black": 0.3, "rate_race_hisp": 0.1, "season": "2009-10"}, +} + +metadata_result = { + # Last data update date + 'loaddatetime': 'Sep 12, 2023', + # IDs (network ID + catchment ID) specifying geos and data sources available + 'catchments': [ + {'networkid': 1, 'name': 'FluSurv-NET', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + + {'networkid': 2, 'name': 'EIP', 'area': 'California', 'catchmentid': '1', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Colorado', 'catchmentid': '2', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Connecticut', 'catchmentid': '3', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Georgia', 'catchmentid': '4', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Maryland', 'catchmentid': '7', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Minnesota', 'catchmentid': '9', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New Mexico', 'catchmentid': '11', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Albany', 'catchmentid': '13', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'New York - Rochester', 'catchmentid': '14', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Oregon', 'catchmentid': '17', 'beginseasonid': 43, 'endseasonid': 51}, + {'networkid': 2, 'name': 'EIP', 'area': 'Tennessee', 'catchmentid': '20', 'beginseasonid': 43, 'endseasonid': 51}, + + {'networkid': 3, 'name': 'IHSP', 'area': 'Entire Network', 'catchmentid': '22', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Idaho', 'catchmentid': '6', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Iowa', 'catchmentid': '5', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Michigan', 'catchmentid': '8', 'beginseasonid': 49, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Ohio', 'catchmentid': '15', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Oklahoma', 'catchmentid': '16', 'beginseasonid': 49, 'endseasonid': 50}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Rhode Island', 'catchmentid': '18', 'beginseasonid': 50, 'endseasonid': 51}, + {'networkid': 3, 'name': 'IHSP', 'area': 'South Dakota', 'catchmentid': '19', 'beginseasonid': 49, 'endseasonid': 49}, + {'networkid': 3, 'name': 'IHSP', 'area': 'Utah', 'catchmentid': '21', 'beginseasonid': 50, 'endseasonid': 51} + ], + # "seasons" element, used for mapping between seasonids and season year spans. + 'seasons': [ + {'description': 'Season 2003-04', 'enabled': True, 'endweek': 2231, 'label': '2003-04', 'seasonid': 43, 'startweek': 2179, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2006-07', 'enabled': True, 'endweek': 2387, 'label': '2006-07', 'seasonid': 46, 'startweek': 2336, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2009-10', 'enabled': True, 'endweek': 2544, 'label': '2009-10', 'seasonid': 49, 'startweek': 2488, 'IncludeWeeklyRatesAndStrata': True}, + {'description': 'Season 2021-22', 'enabled': True, 'endweek': 3170, 'label': '2021-22', 'seasonid': 61, 'startweek': 3119, 'IncludeWeeklyRatesAndStrata': False}, + {'description': 'Season 2022-23', 'enabled': True, 'endweek': 3222, 'label': '2022-23', 'seasonid': 62, 'startweek': 3171, 'IncludeWeeklyRatesAndStrata': False}, + # sic + {'description': 'Season 2023-24 ', 'enabled': True, 'endweek': 3274, 'label': '2023-24', 'seasonid': 63, 'startweek': 3223, 'IncludeWeeklyRatesAndStrata': False}, + ], + # "master_lookup" element, used for mapping between valueids and strata descriptions + 'master_lookup': [ + {'Variable': 'Age', 'valueid': 1, 'parentid': 97, 'Label': '0-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 2, 'parentid': 97, 'Label': '5-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 3, 'parentid': 98, 'Label': '18-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 4, 'parentid': 98, 'Label': '50-64 yr', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 5, 'parentid': 98, 'Label': '>= 65 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 7, 'parentid': 5, 'Label': '65-74 yr', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 8, 'parentid': 15, 'Label': '75-84 yr', 'Color_HexValue': '#b2df8a', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 9, 'parentid': 15, 'Label': '>= 85', 'Color_HexValue': '#b2df8a', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 10, 'parentid': 3, 'Label': '18-29 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 11, 'parentid': 3, 'Label': '30-39 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 12, 'parentid': 3, 'Label': '40-49 yr', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 13, 'parentid': 1, 'Label': '0-< 1 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 14, 'parentid': 1, 'Label': '1-4 yr', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 15, 'parentid': 5, 'Label': '>= 75', 'Color_HexValue': '#b2df8a', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 21, 'parentid': 2, 'Label': '5-11 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 22, 'parentid': 2, 'Label': '12-17 yr', 'Color_HexValue': '#707070', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 97, 'parentid': 0, 'Label': '< 18', 'Color_HexValue': '#000000', 'Enabled': True}, + {'Variable': 'Age', 'valueid': 98, 'parentid': 0, 'Label': '>= 18', 'Color_HexValue': '#000000', 'Enabled': True}, + + {'Variable': 'Race', 'valueid': 1, 'parentid': None, 'Label': 'White', 'Color_HexValue': '#516889', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 2, 'parentid': None, 'Label': 'Black', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 3, 'parentid': None, 'Label': 'Hispanic/Latino', 'Color_HexValue': '#d19833', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 4, 'parentid': None, 'Label': 'Asian/Pacific Islander', 'Color_HexValue': '#cc5e56', 'Enabled': True}, + {'Variable': 'Race', 'valueid': 5, 'parentid': None, 'Label': 'American Indian/Alaska Native', 'Color_HexValue': '#007d8e', 'Enabled': True}, + + {'Variable': 'Sex', 'valueid': 1, 'parentid': None, 'Label': 'Male', 'Color_HexValue': '#44b3c6', 'Enabled': True}, + {'Variable': 'Sex', 'valueid': 2, 'parentid': None, 'Label': 'Female', 'Color_HexValue': '#F2775F', 'Enabled': True}, + + {'Variable': 'Flutype', 'valueid': 1, 'parentid': None, 'Label': 'Influenza A', 'Color_HexValue': '#FF0000', 'Enabled': True}, + {'Variable': 'Flutype', 'valueid': 2, 'parentid': None, 'Label': 'Influenza B', 'Color_HexValue': '#0000FF', 'Enabled': True}, + + {'Variable': None, 'valueid': 0, 'parentid': 0, 'Label': 'Overall', 'Color_HexValue': '#000000', 'Enabled': True}, + ], + 'default_data': network_all_example_data, + # Mapping each mmwrid to a week number, season, and date. Could use this instead of our current epoch-based function. + 'mmwr': [ + {'mmwrid': 2828, 'weekend': '2016-03-12', 'weeknumber': 10, 'weekstart': '2016-03-06', 'year': 2016, 'yearweek': 201610, 'seasonid': 55, 'label': 'Mar-12-2016', 'weekendlabel': 'Mar 12, 2016', 'weekendlabel2': 'Mar-12-2016'}, + {'mmwrid': 2885, 'weekend': '2017-04-15', 'weeknumber': 15, 'weekstart': '2017-04-09', 'year': 2017, 'yearweek': 201715, 'seasonid': 56, 'label': 'Apr-15-2017', 'weekendlabel': 'Apr 15, 2017', 'weekendlabel2': 'Apr-15-2017'}, + {'mmwrid': 2911, 'weekend': '2017-10-14', 'weeknumber': 41, 'weekstart': '2017-10-08', 'year': 2017, 'yearweek': 201741, 'seasonid': 57, 'label': 'Oct-14-2017', 'weekendlabel': 'Oct 14, 2017', 'weekendlabel2': 'Oct-14-2017'}, + {'mmwrid': 2928, 'weekend': '2018-02-10', 'weeknumber': 6, 'weekstart': '2018-02-04', 'year': 2018, 'yearweek': 201806, 'seasonid': 57, 'label': 'Feb-10-2018', 'weekendlabel': 'Feb 10, 2018', 'weekendlabel2': 'Feb-10-2018'}, + {'mmwrid': 2974, 'weekend': '2018-12-29', 'weeknumber': 52, 'weekstart': '2018-12-23', 'year': 2018, 'yearweek': 201852, 'seasonid': 58, 'label': 'Dec-29-2018', 'weekendlabel': 'Dec 29, 2018', 'weekendlabel2': 'Dec-29-2018'}, + {'mmwrid': 3031, 'weekend': '2020-02-01', 'weeknumber': 5, 'weekstart': '2020-01-26', 'year': 2020, 'yearweek': 202005, 'seasonid': 59, 'label': 'Feb-01-2020', 'weekendlabel': 'Feb 01, 2020', 'weekendlabel2': 'Feb-01-2020'}, + {'mmwrid': 3037, 'weekend': '2020-03-14', 'weeknumber': 11, 'weekstart': '2020-03-08', 'year': 2020, 'yearweek': 202011, 'seasonid': 59, 'label': 'Mar-14-2020', 'weekendlabel': 'Mar 14, 2020', 'weekendlabel2': 'Mar-14-2020'}, + {'mmwrid': 3077, 'weekend': '2020-12-19', 'weeknumber': 51, 'weekstart': '2020-12-13', 'year': 2020, 'yearweek': 202051, 'seasonid': 60, 'label': 'Dec-19-2020', 'weekendlabel': 'Dec 19, 2020', 'weekendlabel2': 'Dec-19-2020'}, + {'mmwrid': 3140, 'weekend': '2022-03-05', 'weeknumber': 9, 'weekstart': '2022-02-27', 'year': 2022, 'yearweek': 202209, 'seasonid': 61, 'label': 'Mar-05-2022', 'weekendlabel': 'Mar 05, 2022', 'weekendlabel2': 'Mar-05-2022'}, + {'mmwrid': 3183, 'weekend': '2022-12-31', 'weeknumber': 52, 'weekstart': '2022-12-25', 'year': 2022, 'yearweek': 202252, 'seasonid': 62, 'label': 'Dec-31-2022', 'weekendlabel': 'Dec 31, 2022', 'weekendlabel2': 'Dec-31-2022'}, + ] +} + +# Example location-specific return JSON from CDC GRASP API. Contains +# partial data for "network_all" location and season 49. +location_api_result = {'default_data': network_all_example_data} + + +# Map derived from "master_lookup" dictionary above, mapping between valueids +# by type and cleaned-up descriptions (no spaces or capital letters, etc) +id_group_map = { + "Age": { + 1: "0", + 2: "1", + 3: "2", + 4: "3", + 5: "4", + 7: "5", + 8: "6", + 9: "7", + 10: "18t29", + 11: "30t39", + 12: "40t49", + 13: "0tlt1", + 14: "1t4", + 15: "gte75", + 21: "5t11", + 22: "12t17", + 97: "lt18", + 98: "gte18", + }, + "Race": { + 1: "white", + 2: "black", + 3: "hisp", + 4: "asian", + 5: "natamer", + }, + "Sex": { + 1: "male", + 2: "female", + }, + "Flutype": { + 1: "a", + 2: "b", + }, + # Unused. Leaving here for documentation's sake. + "Overall": { + 0: "overall", + }, +} + +catchment_name_map = { + "CA": (2, 1), + "CO": (2, 2), + "CT": (2, 3), + "GA": (2, 4), + "IA": (3, 5), + "ID": (3, 6), + "MD": (2, 7), + "MI": (3, 8), + "MN": (2, 9), + "NM": (2, 11), + "NY_albany": (2, 13), + "NY_rochester": (2, 14), + "OH": (3, 15), + "OK": (3, 16), + "OR": (2, 17), + "RI": (3, 18), + "SD": (3, 19), + "TN": (2, 20), + "UT": (3, 21), + "network_all": (1, 22), + "network_eip": (2, 22), + "network_ihsp": (3, 22), +} + +with patch(__test_target__ + ".fetch_json", + return_value = metadata_result) as MockFlusurvMetadata: + metadata_fetcher = flusurv.FlusurvMetadata(52) + api_fetcher = flusurv.FlusurvLocationFetcher(52) class FunctionTests(unittest.TestCase): @@ -28,6 +232,154 @@ def test_fetch_json(self): requests_impl = MagicMock() requests_impl.get.return_value = response_object - actual = fetch_json(path, payload, requests_impl=requests_impl) + actual = flusurv.fetch_json(path, payload, requests_impl=requests_impl) self.assertEqual(actual, sentinel.expected) + + def test_mmwrid_to_epiweek(self): + # Test epoch + self.assertEqual(flusurv.mmwrid_to_epiweek(2179), 200340) + + for mmwr in metadata_result["mmwr"]: + self.assertEqual(flusurv.mmwrid_to_epiweek(mmwr["mmwrid"]), mmwr["yearweek"]) + + def test_metadata_attributes(self): + self.assertEqual(metadata_fetcher.metadata, metadata_result) + self.assertEqual(metadata_fetcher.issue, 202337) + self.assertEqual(metadata_fetcher.max_age_weeks, 52) + self.assertEqual(metadata_fetcher.seasonids, {61, 62, 63}) + + self.assertEqual(metadata_fetcher.location_to_code, catchment_name_map) + self.assertEqual(metadata_fetcher.locations, catchment_name_map.keys()) + + self.assertEqual(metadata_fetcher.id_to_group, id_group_map) + self.assertEqual(metadata_fetcher.id_to_season, { + 43: '2003-04', + 46: '2006-07', + 49: '2009-10', + 61: '2021-22', + 62: '2022-23', + 63: '2023-24', + }) + + def test_geo_name_conversion(self): + geos = ( + "California", + "Utah", + "Entire Network", + "Entire Network", + "Entire Network", + "New York - Albany", + "New York - Rochester", + ) + networks = ( + "FluSurv-NET", + "FluSurv-NET", + "FluSurv-NET", + "IHSP", + "EIP", + "FluSurv-NET", + ) + expected_list = [ + "CA", + "UT", + "network_all", + "network_ihsp", + "network_eip", + "NY_albany", + "NY_rochester", + ] + + for (geo, network), expected in zip(zip(geos, networks), expected_list): + self.assertEqual(metadata_fetcher._location_name_to_abbr(geo, network), expected) + + @patch(__test_target__ + ".fetch_json") + def test_get_data(self, MockFlusurvLocation): + MockFlusurvLocation.return_value = location_api_result + + season_api_fetcher = api_fetcher + season_api_fetcher.metadata.seasonids = [30, 49] + + self.assertEqual(season_api_fetcher.get_data("network_all"), by_epiweek_example_data) + + @patch(__test_target__ + ".fetch_json") + def test_fetch_flusurv_location(self, MockFlusurvLocation): + # API returns normal result + MockFlusurvLocation.return_value = location_api_result + self.assertEqual(api_fetcher._fetch_flusurv_location("network_all"), location_api_result) + + # API returns empty result formatted normally + empty_expected_result = {"default_data": []} + MockFlusurvLocation.return_value = empty_expected_result + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + empty_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(empty_data_result, empty_expected_result) + + # API returns "no data" result + MockFlusurvLocation.return_value = {"default_data": {"response": "No Data"}} + with self.assertWarnsRegex(Warning, "No data was returned from the API for network_all"): + no_data_result = api_fetcher._fetch_flusurv_location("network_all") + self.assertEqual(no_data_result, empty_expected_result) + + def test_group_by_epiweek(self): + self.assertEqual(api_fetcher._group_by_epiweek(metadata_result), by_epiweek_example_data) + + duplicate_input_data = { + 'default_data': [ + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 42, 'weeklyrate': 0.5, 'mmwrid': 2519}, + {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 1, 'sexid': 0, 'raceid': 0, 'flutype': 0, 'rate': 42, 'weeklyrate': 54, 'mmwrid': 2519}, + ] + } + + with self.assertWarnsRegex(Warning, "warning: Multiple rates seen for 201014"): + api_fetcher._group_by_epiweek(duplicate_input_data) + + self.assertEqual( + api_fetcher._group_by_epiweek({"default_data": []}), + defaultdict(lambda: defaultdict(lambda: None)) + ) + + @patch('builtins.print') + def test_group_by_epiweek_print_msgs(self, mock_print): + api_fetcher._group_by_epiweek(metadata_result) + mock_print.assert_called_with("found data for 4 epiweeks") + + def test_groupids_to_name(self): + ids = ( + (1, 0, 0, 0), + (9, 0, 0, 0), + (0, 2, 0, 0), + (0, 0, 3, 0), + (0, 0, 5, 0), + (0, 0, 0, 0), + (0, 0, 0, 2), + (0, 0, 0, 1), + (13, 0, 0, 0), + (97, 0, 0, 0), + (999, 0, 0, 0), + (0, 0, 111, 0), + ) + expected_list = [ + "rate_age_0", + "rate_age_7", + "rate_sex_female", + "rate_race_hisp", + "rate_race_natamer", + "rate_overall", + "rate_flu_b", + "rate_flu_a", + "rate_age_0tlt1", + "rate_age_lt18", + "rate_age_999", + "rate_race_111", + ] + + for (ageid, sexid, raceid, fluid), expected in zip(ids, expected_list): + self.assertEqual(api_fetcher._groupid_to_name(ageid, sexid, raceid, fluid), expected) + + with self.assertRaisesRegex(ValueError, "Ageid cannot be 6"): + api_fetcher._groupid_to_name(6, 0, 0, 0) + with self.assertRaisesRegex(ValueError, "Expect at least three of four group ids to be 0"): + api_fetcher._groupid_to_name(1, 1, 0, 0) + api_fetcher._groupid_to_name(0, 1, 1, 0) + api_fetcher._groupid_to_name(1, 1, 1, 1)