Coverage for censusdis/impl/fetch.py: 97%
75 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
1# Copyright (c) 2022 Darren Erik Vengroff
2"""Utilities for loading census data."""
3from logging import getLogger
4from typing import Any, Mapping, Optional, Union, Tuple
6import pandas as pd
7import requests
9from censusdis.impl.exceptions import CensusApiException
11logger = getLogger(__name__)
14class _CertificateManager:
15 """Manage the certificates and verification flags used when we make calls to the U.S. Census servers."""
17 _have_a_singletop_certificate_manager = False
19 def __init__(
20 self,
21 *,
22 data_verify: Union[bool, str] = True,
23 data_cert: Optional[Union[str, Tuple[str, str]]] = None,
24 map_verify: Union[bool, str] = True,
25 map_cert: Optional[Union[str, Tuple[str, str]]] = None,
26 ):
27 """
28 Manage the certificates and verification flags used when we make calls to the U.S. Census servers.
30 Parameters
31 ----------
32 data_verify
33 Value to pass to `requests.get` in the `verify=` argument for data API calls to `https://api.census.gov`.
34 data_cert
35 Value to pass to `requests.get` in the `cert=` argument for data API calls to `https://api.census.gov`.
36 map_verify
37 Value to pass to `requests.get` in the `verify=` argument for getting map data with calls to
38 `https://www2.census.gov`.
39 map_cert
40 Value to pass to `requests.get` in the `cert=` argument for getting map data with calls to
41 `https://www2.census.gov`.
42 """
43 if self._have_a_singletop_certificate_manager:
44 raise ValueError(
45 "Cannot create a CertificateManager. Please use `censusdis.data.certificates`."
46 )
48 self._data_verify = data_verify
49 self._data_cert = data_cert
50 self._map_verify = map_verify
51 self._map_cert = map_cert
53 self._have_a_singletop_certificate_manager = True
55 @property
56 def data_verify(self) -> Union[bool, str]:
57 """Value to pass to `requests.get` in the `verify=` argument for data from `https://api.census.gov`."""
58 return self._data_verify
60 @property
61 def data_cert(self) -> Union[str, Tuple[str, str], None]:
62 """Value to pass to `requests.get` in the `cert=` argument for data from `https://api.census.gov`."""
63 return self._data_cert
65 @property
66 def map_verify(self) -> Union[bool, str]:
67 """Value to pass to `requests.get` in the `verify=` argument for maps from `https://www2.census.gov`."""
68 return self._map_verify
70 @property
71 def map_cert(self) -> Union[str, Tuple[str, str], None]:
72 """Value to pass to `requests.get` in the `cert=` argument for maps from `https://www2.census.gov`."""
73 return self._map_cert
75 def use(
76 self,
77 *,
78 data_verify: Union[bool, str] = True,
79 data_cert: Optional[Union[str, Tuple[str, str]]] = None,
80 map_verify: Union[bool, str] = True,
81 map_cert: Optional[Union[str, Tuple[str, str]]] = None,
82 ) -> "_CertificateManagerContext":
83 """
84 Set certificates and verification flags globally or within a context.
86 If you want to set up certificate handling globally, you can just call this
87 method alone, for example:
89 import censusdis.data as ced
91 ced.certificates.use(data_verify=False, map_verify=False)
93 will turn off certificate verification for all data and map calls. This can by useful
94 in a notebook environment, where you want to set up how certificates are handled once
95 at the top of the notebook.
97 If you want the effects to only be temporary, you can use a context manager with a `with`
98 statement as follows::
100 import censusdis.data as ced
102 with ced.certificates.use(data_verify=False, map_verify=False):
103 # No verification will be performed here.
104 df = ced.download(...)
106 # Upon exiting the context, verification is back on.
107 df = ced.download(...)
109 Parameters
110 ----------
111 data_verify
112 Value to pass to `requests.get` in the `verify=` argument for data API calls to `https://api.census.gov`.
113 data_cert
114 Value to pass to `requests.get` in the `cert=` argument for data API calls to `https://api.census.gov`.
115 map_verify
116 Value to pass to `requests.get` in the `verify=` argument for getting map data with calls to
117 `https://www2.census.gov`.
118 map_cert
119 Value to pass to `requests.get` in the `cert=` argument for getting map data with calls to
120 `https://www2.census.gov`.
121 """
122 context = _CertificateManagerContext(self)
123 self._data_verify = data_verify
124 self._data_cert = data_cert
125 self._map_verify = map_verify
126 self._map_cert = map_cert
127 return context
130class _CertificateManagerContext:
131 def __init__(self, certificate_manager: _CertificateManager):
132 self._certificate_manager = certificate_manager
133 self._data_verify = certificate_manager.data_verify
134 self._data_cert = certificate_manager.data_cert
135 self._map_verify = certificate_manager.map_verify
136 self._map_cert = certificate_manager.map_cert
138 def __enter__(self):
139 pass
141 def __exit__(self, type_, value, traceback):
142 self._certificate_manager._data_verify = self._data_verify
143 self._certificate_manager._data_cert = self._data_cert
144 self._certificate_manager._map_verify = self._map_verify
145 self._certificate_manager._map_cert = self._map_cert
148certificates = _CertificateManager()
149"""
150A container for the certificates and verification flags used when we make calls to the U.S. Census servers.
152Unless you are working behind a security proxy or firewall that manipulates certificates in
153some way, you will never have to use this.
155If you would not normally use the `verify=` or `cert=` arguments when using `requests.get` then
156you need not worry about this. If you would, then use the values you would pass for accessing
157`https://api.census.gov` or `https://www2.census.gov`.
158"""
161def json_from_url(url: str, params: Optional[Mapping[str, str]] = None) -> Any:
162 """Get json from a URL."""
163 request = requests.get(
164 url, params=params, cert=certificates.data_cert, verify=certificates.data_verify
165 )
167 if request.status_code == 200:
168 try:
169 parsed_json = request.json()
170 return parsed_json
171 except requests.exceptions.JSONDecodeError:
172 logger.debug(f"API call got 200 with unparseable JSON:\n{request.text}")
173 if (
174 "You included a key with this request, however, it is not valid."
175 in request.text
176 ):
177 message = f"Census API request to {request.url} failed because your key is invalid."
178 else:
179 message = f"Census API request to {request.url} failed. Unable to parse returned JSON:\n{request.text}"
180 raise CensusApiException(message)
182 # Do our best to tell the user something informative.
183 message = f"Census API request to {request.url} failed with status {request.status_code}. {request.text}"
184 logger.debug(message)
185 raise CensusApiException(message)
188def data_from_url(url: str, params: Optional[Mapping[str, str]] = None) -> pd.DataFrame:
189 """Get json from a URL and parse into a data frame."""
190 logger.info(f"Downloading data from {url} with {params}.")
192 parsed_json = json_from_url(url, params)
194 return _df_from_census_json(parsed_json)
197def _df_from_census_json(parsed_json):
198 if (
199 isinstance(parsed_json, list)
200 and len(parsed_json) >= 1
201 and isinstance(parsed_json[0], list)
202 ):
203 return pd.DataFrame(
204 parsed_json[1:],
205 columns=[
206 c.upper()
207 .replace(" ", "_")
208 .replace("-", "_")
209 .replace("/", "_")
210 .replace("(", "")
211 .replace(")", "")
212 for c in parsed_json[0]
213 ],
214 )
216 raise CensusApiException(
217 f"Expected json data to be a list of lists, not a {type(parsed_json)}"
218 )