Coverage for censusdis/impl/varsource/censusapi.py: 100%

45 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-04-03 05:39 +0000

1# Copyright (c) 2022 Darren Erik Vengroff 

2"""A variable source that loads metadata about variables from the U.S. Census API.""" 

3 

4from typing import Any, Dict, List, Optional 

5 

6from censusdis.impl.fetch import json_from_url 

7from censusdis.impl.varsource.base import VariableSource 

8 

9 

10class CensusApiVariableSource(VariableSource): 

11 """ 

12 A :py:class:`~VariableSource` that gets data from the US Census remote API. 

13 

14 Users will rarely if ever need to explicitly construct objects 

15 of this class. There is one behind the singleton cache 

16 `censusdis.censusdata.variables`. 

17 """ 

18 

19 @staticmethod 

20 def _url_part(dataset: str, year: int): 

21 if not isinstance(year, int): 

22 return f"{dataset}" 

23 

24 return f"{year}/{dataset}" 

25 

26 @staticmethod 

27 def variables_url(dataset: str, year: int, response_format: str = "json") -> str: 

28 """ 

29 Construct the URL to fetch metadata about all variables. 

30 

31 Parameters 

32 ---------- 

33 dataset 

34 The census dataset. 

35 year 

36 The year 

37 response_format 

38 The desired format of the response. Either `json` (the default) 

39 or `html`. 

40 

41 Returns 

42 ------- 

43 The URL to fetch the metadata from. 

44 

45 """ 

46 return ( 

47 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/" 

48 f"variables.{response_format}" 

49 ) 

50 

51 @staticmethod 

52 def url(dataset: str, year: int, name: str, response_format: str = "json") -> str: 

53 """ 

54 Construct the URL to fetch metadata about a variable. 

55 

56 This is where we fetch metadata that is then put into the 

57 local cache. 

58 

59 Parameters 

60 ---------- 

61 dataset 

62 The census dataset. 

63 year 

64 The year 

65 name 

66 The name of the variable. 

67 response_format 

68 The desired format of the response. Either `json` (the default) 

69 or `html`. 

70 

71 Returns 

72 ------- 

73 The URL to fetch the metadata from. 

74 """ 

75 return ( 

76 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/" 

77 f"variables/{name}.{response_format}" 

78 ) 

79 

80 @staticmethod 

81 def group_url( 

82 dataset: str, 

83 year: int, 

84 group_name: Optional[str] = None, 

85 ) -> str: 

86 """ 

87 Get the URL to fetch metadata about a group of variables. 

88 

89 This can either be all the variables in a dataset, if a group 

90 name is not specified, or just the variables in a particular 

91 group if the data set has groups. 

92 

93 Some datasets, `dec/pl` dataset for example, do not have 

94 groups, so a group name need not be passed. Others, like 

95 `acs/acs5` have groups, so a group name such as `B01001` 

96 will normally be passed in. 

97 

98 Parameters 

99 ---------- 

100 dataset 

101 The census dataset. 

102 year 

103 The year 

104 group_name 

105 The name of the group, or `None` if the dataset has no 

106 groups. 

107 

108 Returns 

109 ------- 

110 The URL to fetch the metadata from. 

111 """ 

112 if group_name is None: 

113 return f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/variables.json" 

114 

115 return ( 

116 f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/" 

117 f"groups/{group_name}.json" 

118 ) 

119 

120 @staticmethod 

121 def all_groups_url(dataset: str, year: int) -> str: 

122 """ 

123 Get the URL to fetch the names of all groups. 

124 

125 Parameters 

126 ---------- 

127 dataset 

128 The census dataset. 

129 year 

130 The year 

131 

132 Returns 

133 ------- 

134 The URL to fetch the metadata from. 

135 """ 

136 return f"https://api.census.gov/data/{CensusApiVariableSource._url_part(dataset, year)}/groups.json" 

137 

138 def get(self, dataset: str, year: int, name: str) -> Dict[str, Any]: 

139 """Get info on a dataset via a remote query to the U.S. Census API.""" 

140 url = self.url(dataset, year, name) 

141 value = json_from_url(url) 

142 

143 return value 

144 

145 def get_group( 

146 self, dataset: str, year: int, name: Optional[str] 

147 ) -> Dict[str, Dict]: 

148 """Get info on the groups in a dataset via a remote query to the U.S. Census API.""" 

149 url = self.group_url(dataset, year, name) 

150 value = json_from_url(url) 

151 

152 # Filter out psuedo-variables like 'for' and 'in'. 

153 value["variables"] = { 

154 k: v 

155 for k, v in value["variables"].items() 

156 if k not in ["in", "for", "ucgid"] 

157 } 

158 

159 # Put the name into the nested dictionaries, so it looks the same is if 

160 # we had gotten it via the variable API even though that API leaves it out. 

161 for k, val in value["variables"].items(): 

162 val["name"] = k 

163 

164 return value 

165 

166 def get_all_groups(self, dataset: str, year: int) -> Dict[str, List]: 

167 """Get info on all the groups in a dataset via a remote query to the U.S. Census API.""" 

168 url = self.all_groups_url(dataset, year) 

169 value = json_from_url(url) 

170 

171 return value 

172 

173 def get_datasets(self, year: Optional[int]) -> Dict[str, Any]: 

174 """Get info on all the datasets for a given year via a remote query to the U.S. Census API.""" 

175 if year is not None: 

176 url = f"https://api.census.gov/data/{year}.json" 

177 else: 

178 url = "https://api.census.gov/data.json" 

179 

180 json = json_from_url(url) 

181 

182 return json