Coverage for censusdis/impl/fetch.py: 97%

75 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-04-03 05:39 +0000

1# Copyright (c) 2022 Darren Erik Vengroff 

2"""Utilities for loading census data.""" 

3from logging import getLogger 

4from typing import Any, Mapping, Optional, Union, Tuple 

5 

6import pandas as pd 

7import requests 

8 

9from censusdis.impl.exceptions import CensusApiException 

10 

11logger = getLogger(__name__) 

12 

13 

14class _CertificateManager: 

15 """Manage the certificates and verification flags used when we make calls to the U.S. Census servers.""" 

16 

17 _have_a_singletop_certificate_manager = False 

18 

19 def __init__( 

20 self, 

21 *, 

22 data_verify: Union[bool, str] = True, 

23 data_cert: Optional[Union[str, Tuple[str, str]]] = None, 

24 map_verify: Union[bool, str] = True, 

25 map_cert: Optional[Union[str, Tuple[str, str]]] = None, 

26 ): 

27 """ 

28 Manage the certificates and verification flags used when we make calls to the U.S. Census servers. 

29 

30 Parameters 

31 ---------- 

32 data_verify 

33 Value to pass to `requests.get` in the `verify=` argument for data API calls to `https://api.census.gov`. 

34 data_cert 

35 Value to pass to `requests.get` in the `cert=` argument for data API calls to `https://api.census.gov`. 

36 map_verify 

37 Value to pass to `requests.get` in the `verify=` argument for getting map data with calls to 

38 `https://www2.census.gov`. 

39 map_cert 

40 Value to pass to `requests.get` in the `cert=` argument for getting map data with calls to 

41 `https://www2.census.gov`. 

42 """ 

43 if self._have_a_singletop_certificate_manager: 

44 raise ValueError( 

45 "Cannot create a CertificateManager. Please use `censusdis.data.certificates`." 

46 ) 

47 

48 self._data_verify = data_verify 

49 self._data_cert = data_cert 

50 self._map_verify = map_verify 

51 self._map_cert = map_cert 

52 

53 self._have_a_singletop_certificate_manager = True 

54 

55 @property 

56 def data_verify(self) -> Union[bool, str]: 

57 """Value to pass to `requests.get` in the `verify=` argument for data from `https://api.census.gov`.""" 

58 return self._data_verify 

59 

60 @property 

61 def data_cert(self) -> Union[str, Tuple[str, str], None]: 

62 """Value to pass to `requests.get` in the `cert=` argument for data from `https://api.census.gov`.""" 

63 return self._data_cert 

64 

65 @property 

66 def map_verify(self) -> Union[bool, str]: 

67 """Value to pass to `requests.get` in the `verify=` argument for maps from `https://www2.census.gov`.""" 

68 return self._map_verify 

69 

70 @property 

71 def map_cert(self) -> Union[str, Tuple[str, str], None]: 

72 """Value to pass to `requests.get` in the `cert=` argument for maps from `https://www2.census.gov`.""" 

73 return self._map_cert 

74 

75 def use( 

76 self, 

77 *, 

78 data_verify: Union[bool, str] = True, 

79 data_cert: Optional[Union[str, Tuple[str, str]]] = None, 

80 map_verify: Union[bool, str] = True, 

81 map_cert: Optional[Union[str, Tuple[str, str]]] = None, 

82 ) -> "_CertificateManagerContext": 

83 """ 

84 Set certificates and verification flags globally or within a context. 

85 

86 If you want to set up certificate handling globally, you can just call this 

87 method alone, for example: 

88 

89 import censusdis.data as ced 

90 

91 ced.certificates.use(data_verify=False, map_verify=False) 

92 

93 will turn off certificate verification for all data and map calls. This can by useful 

94 in a notebook environment, where you want to set up how certificates are handled once 

95 at the top of the notebook. 

96 

97 If you want the effects to only be temporary, you can use a context manager with a `with` 

98 statement as follows:: 

99 

100 import censusdis.data as ced 

101 

102 with ced.certificates.use(data_verify=False, map_verify=False): 

103 # No verification will be performed here. 

104 df = ced.download(...) 

105 

106 # Upon exiting the context, verification is back on. 

107 df = ced.download(...) 

108 

109 Parameters 

110 ---------- 

111 data_verify 

112 Value to pass to `requests.get` in the `verify=` argument for data API calls to `https://api.census.gov`. 

113 data_cert 

114 Value to pass to `requests.get` in the `cert=` argument for data API calls to `https://api.census.gov`. 

115 map_verify 

116 Value to pass to `requests.get` in the `verify=` argument for getting map data with calls to 

117 `https://www2.census.gov`. 

118 map_cert 

119 Value to pass to `requests.get` in the `cert=` argument for getting map data with calls to 

120 `https://www2.census.gov`. 

121 """ 

122 context = _CertificateManagerContext(self) 

123 self._data_verify = data_verify 

124 self._data_cert = data_cert 

125 self._map_verify = map_verify 

126 self._map_cert = map_cert 

127 return context 

128 

129 

130class _CertificateManagerContext: 

131 def __init__(self, certificate_manager: _CertificateManager): 

132 self._certificate_manager = certificate_manager 

133 self._data_verify = certificate_manager.data_verify 

134 self._data_cert = certificate_manager.data_cert 

135 self._map_verify = certificate_manager.map_verify 

136 self._map_cert = certificate_manager.map_cert 

137 

138 def __enter__(self): 

139 pass 

140 

141 def __exit__(self, type_, value, traceback): 

142 self._certificate_manager._data_verify = self._data_verify 

143 self._certificate_manager._data_cert = self._data_cert 

144 self._certificate_manager._map_verify = self._map_verify 

145 self._certificate_manager._map_cert = self._map_cert 

146 

147 

148certificates = _CertificateManager() 

149""" 

150A container for the certificates and verification flags used when we make calls to the U.S. Census servers. 

151 

152Unless you are working behind a security proxy or firewall that manipulates certificates in 

153some way, you will never have to use this. 

154 

155If you would not normally use the `verify=` or `cert=` arguments when using `requests.get` then 

156you need not worry about this. If you would, then use the values you would pass for accessing 

157`https://api.census.gov` or `https://www2.census.gov`. 

158""" 

159 

160 

161def json_from_url(url: str, params: Optional[Mapping[str, str]] = None) -> Any: 

162 """Get json from a URL.""" 

163 request = requests.get( 

164 url, params=params, cert=certificates.data_cert, verify=certificates.data_verify 

165 ) 

166 

167 if request.status_code == 200: 

168 try: 

169 parsed_json = request.json() 

170 return parsed_json 

171 except requests.exceptions.JSONDecodeError: 

172 logger.debug(f"API call got 200 with unparseable JSON:\n{request.text}") 

173 if ( 

174 "You included a key with this request, however, it is not valid." 

175 in request.text 

176 ): 

177 message = f"Census API request to {request.url} failed because your key is invalid." 

178 else: 

179 message = f"Census API request to {request.url} failed. Unable to parse returned JSON:\n{request.text}" 

180 raise CensusApiException(message) 

181 

182 # Do our best to tell the user something informative. 

183 message = f"Census API request to {request.url} failed with status {request.status_code}. {request.text}" 

184 logger.debug(message) 

185 raise CensusApiException(message) 

186 

187 

188def data_from_url(url: str, params: Optional[Mapping[str, str]] = None) -> pd.DataFrame: 

189 """Get json from a URL and parse into a data frame.""" 

190 logger.info(f"Downloading data from {url} with {params}.") 

191 

192 parsed_json = json_from_url(url, params) 

193 

194 return _df_from_census_json(parsed_json) 

195 

196 

197def _df_from_census_json(parsed_json): 

198 if ( 

199 isinstance(parsed_json, list) 

200 and len(parsed_json) >= 1 

201 and isinstance(parsed_json[0], list) 

202 ): 

203 return pd.DataFrame( 

204 parsed_json[1:], 

205 columns=[ 

206 c.upper() 

207 .replace(" ", "_") 

208 .replace("-", "_") 

209 .replace("/", "_") 

210 .replace("(", "") 

211 .replace(")", "") 

212 for c in parsed_json[0] 

213 ], 

214 ) 

215 

216 raise CensusApiException( 

217 f"Expected json data to be a list of lists, not a {type(parsed_json)}" 

218 )