Coverage for censusdis/impl/varsource/censusapi.py: 100%
45 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
1# Copyright (c) 2022 Darren Erik Vengroff
2"""A variable source that loads metadata about variables from the U.S. Census API."""
4from typing import Any, Dict, List, Optional
6from censusdis.impl.fetch import json_from_url
7from censusdis.impl.varsource.base import VariableSource
10class CensusApiVariableSource(VariableSource):
11 """
12 A :py:class:`~VariableSource` that gets data from the US Census remote API.
14 Users will rarely if ever need to explicitly construct objects
15 of this class. There is one behind the singleton cache
16 `censusdis.censusdata.variables`.
17 """
19 @staticmethod
20 def _url_part(dataset: str, year: int):
21 if not isinstance(year, int):
22 return f"{dataset}"
24 return f"{year}/{dataset}"
26 @staticmethod
27 def variables_url(dataset: str, year: int, response_format: str = "json") -> str:
28 """
29 Construct the URL to fetch metadata about all variables.
31 Parameters
32 ----------
33 dataset
34 The census dataset.
35 year
36 The year
37 response_format
38 The desired format of the response. Either `json` (the default)
39 or `html`.
41 Returns
42 -------
43 The URL to fetch the metadata from.
45 """
46 return (
47 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/"
48 f"variables.{response_format}"
49 )
51 @staticmethod
52 def url(dataset: str, year: int, name: str, response_format: str = "json") -> str:
53 """
54 Construct the URL to fetch metadata about a variable.
56 This is where we fetch metadata that is then put into the
57 local cache.
59 Parameters
60 ----------
61 dataset
62 The census dataset.
63 year
64 The year
65 name
66 The name of the variable.
67 response_format
68 The desired format of the response. Either `json` (the default)
69 or `html`.
71 Returns
72 -------
73 The URL to fetch the metadata from.
74 """
75 return (
76 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/"
77 f"variables/{name}.{response_format}"
78 )
80 @staticmethod
81 def group_url(
82 dataset: str,
83 year: int,
84 group_name: Optional[str] = None,
85 ) -> str:
86 """
87 Get the URL to fetch metadata about a group of variables.
89 This can either be all the variables in a dataset, if a group
90 name is not specified, or just the variables in a particular
91 group if the data set has groups.
93 Some datasets, `dec/pl` dataset for example, do not have
94 groups, so a group name need not be passed. Others, like
95 `acs/acs5` have groups, so a group name such as `B01001`
96 will normally be passed in.
98 Parameters
99 ----------
100 dataset
101 The census dataset.
102 year
103 The year
104 group_name
105 The name of the group, or `None` if the dataset has no
106 groups.
108 Returns
109 -------
110 The URL to fetch the metadata from.
111 """
112 if group_name is None:
113 return f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/variables.json"
115 return (
116 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/"
117 f"groups/{group_name}.json"
118 )
120 @staticmethod
121 def all_groups_url(dataset: str, year: int) -> str:
122 """
123 Get the URL to fetch the names of all groups.
125 Parameters
126 ----------
127 dataset
128 The census dataset.
129 year
130 The year
132 Returns
133 -------
134 The URL to fetch the metadata from.
135 """
136 return f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/groups.json"
138 def get(self, dataset: str, year: int, name: str) -> Dict[str, Any]:
139 """Get info on a dataset via a remote query to the U.S. Census API."""
140 url = self.url(dataset, year, name)
141 value = json_from_url(url)
143 return value
145 def get_group(
146 self, dataset: str, year: int, name: Optional[str]
147 ) -> Dict[str, Dict]:
148 """Get info on the groups in a dataset via a remote query to the U.S. Census API."""
149 url = self.group_url(dataset, year, name)
150 value = json_from_url(url)
152 # Filter out psuedo-variables like 'for' and 'in'.
153 value["variables"] = {
154 k: v
155 for k, v in value["variables"].items()
156 if k not in ["in", "for", "ucgid"]
157 }
159 # Put the name into the nested dictionaries, so it looks the same is if
160 # we had gotten it via the variable API even though that API leaves it out.
161 for k, val in value["variables"].items():
162 val["name"] = k
164 return value
166 def get_all_groups(self, dataset: str, year: int) -> Dict[str, List]:
167 """Get info on all the groups in a dataset via a remote query to the U.S. Census API."""
168 url = self.all_groups_url(dataset, year)
169 value = json_from_url(url)
171 return value
173 def get_datasets(self, year: Optional[int]) -> Dict[str, Any]:
174 """Get info on all the datasets for a given year via a remote query to the U.S. Census API."""
175 if year is not None:
176 url = f"https://api.census.gov/data/{year}.json"
177 else:
178 url = "https://api.census.gov/data.json"
180 json = json_from_url(url)
182 return json