Coverage for censusdis/impl/varsource/base.py: 78%
18 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
1# Copyright (c) 2022 Darren Erik Vengroff
2"""
3Abstract base class for variable sources.
5The two concrete implementations are a mock
6version for testing and a version that loads
7from the U.S. Census API.
8"""
10from abc import ABC, abstractmethod
11from typing import Any, Dict, List, Literal, Optional, Union
13VintageType = Union[int, Literal["timeseries"]]
14"""
15The type we use to specify the vintage of a dataset.
17Most datasets are organized by year, so we pass an integer
18year like 2020. But some datasets are timeseries that cover
19multiple years, so we specify the literal value "timeseries".
20"""
23class VariableSource(ABC):
24 """
25 A source of variables, typically used behind a :py:class:`~VariableCache`.
27 The purpose of this class is to get variable and group information
28 from a source, typically a remote API call to the US Census API.
29 Another use case is to enable mocking for testing the rest of the
30 :py:class:`~VariableCache` functionality, which is a superset of
31 what this class does.
32 """
34 @abstractmethod
35 def get(
36 self,
37 dataset: str,
38 year: int,
39 name: str,
40 ) -> Dict[str, Any]:
41 """
42 Get information on a variable for a given dataset in a given year.
44 The return value is a dictionary with the following fields:
46 .. list-table:: Title
47 :widths: 25 75
48 :header-rows: 0
50 * - `"name"`
51 - The name of the variable.
52 * - '"label"`
53 - A description of the variable. Within groups, hierarchies of
54 variables are represented by seperating levels with `"!!"`.
55 * - `"concept"`
56 - The concept this variable and others in the group represent.
57 * - `"group"`
58 - The group the variable belongs to. To query an entire group,
59 use the :py:meth:`~get_group` method.
60 * - `"limit"`
61 -
62 * - `"attributes"`
63 - A comma-separated list of variables that are attributes of this
64 one.
66 This dictionary is very much like the JSON returned from US Census
67 API URLs like
68 https://api.census.gov/data/2020/acs/acs5/variables/B03001_001E.json
70 Parameters
71 ----------
72 dataset
73 The census dataset, for example `dec/acs5` for ACS5 data
74 (https://www.census.gov/data/developers/data-sets/acs-5year.html and
75 https://api.census.gov/data/2020/acs/acs5.html)
76 or `dec/pl` for redistricting data
77 (https://www.census.gov/programs-surveys/decennial-census/about/rdo.html and
78 https://api.census.gov/data/2020/dec/pl.html)
79 year
80 The year
81 name
82 The name of the variable to get information about. For example,
83 `B03002_001E` is a variable from the ACS5 data set that represents
84 total population in a geographic area.
86 Returns
87 -------
88 A dictionary of information about the variable.
89 """
90 raise NotImplementedError("Abstract method.")
92 @abstractmethod
93 def get_group(
94 self,
95 dataset: str,
96 year: int,
97 name: str,
98 ) -> Dict[str, Dict]:
99 """
100 Get information on a group of variables for a given dataset in a given year.
102 The return value is a dictionary that is very much like the JSON returned
103 from US Census API URLs like
104 https://api.census.gov/data/2020/acs/acs5/groups/B03002.json
106 See :py:meth:`~VariableSource.get` for more details.
108 Parameters
109 ----------
110 dataset
111 The census dataset, for example `dec/acs5` for ACS5 data
112 (https://www.census.gov/data/developers/data-sets/acs-5year.html and
113 https://api.census.gov/data/2020/acs/acs5.html)
114 or `dec/pl` for redistricting data
115 (https://www.census.gov/programs-surveys/decennial-census/about/rdo.html and
116 https://api.census.gov/data/2020/dec/pl.html)
117 year
118 The year
119 name
120 The name of the group to get information about. For example,
121 `B03002` is a group from the ACS5 data set that contains
122 variables that represent the population of various racial and
123 ethnic groups in a geographic area.
125 Returns
126 -------
127 A dictionary with a single key `"variables"`. The value
128 associated with that key is a dictionary that maps from the
129 names of variables in the group to dictionaries of attributes
130 of the variable, in the same form as that returned for individual
131 variables by the method :py:meth:`~VariableSource.get`.
132 """
133 raise NotImplementedError("Abstract method.")
135 @abstractmethod
136 def get_all_groups(self, dataset: str, year: int) -> Dict[str, List]:
137 """
138 Get information on a group of variables for a given dataset in a given year.
140 The return value is a dictionary that is very much like the JSON returned
141 from US Census API URLs like
142 https://api.census.gov/data/2020/acs/acs5/groups.json
144 See :py:meth:`~VariableSource.get_all_groups` for more details.
146 dataset
147 The census dataset, for example `dec/acs5` for ACS5 data
148 (https://www.census.gov/data/developers/data-sets/acs-5year.html and
149 https://api.census.gov/data/2020/acs/acs5.html)
150 or `dec/pl` for redistricting data
151 (https://www.census.gov/programs-surveys/decennial-census/about/rdo.html and
152 https://api.census.gov/data/2020/dec/pl.html)
153 year
154 The year
156 Returns
157 -------
158 A dictionary with a single key `"groups"`. The value
159 associated with that key is a dictionary that maps from the
160 names of groups to dictionaries of attributes
161 of each group.
162 """
163 raise NotImplementedError("Abstract method.")
165 @abstractmethod
166 def get_datasets(self, year: Optional[int]) -> Dict[str, Any]:
167 """
168 Get descriptions of all the datasets available for a given year.
170 Parameters
171 ----------
172 year
173 The year. If `None`, get all datasets for all years.
175 Returns
176 -------
177 A dictionary with a key "datasets". The value associated
178 with that key is a dictionary that maps from the names
179 of data sets to dictionaries of attributes of each data
180 set.
181 """
182 raise NotImplementedError("Abstract method.")