Coverage for utils/symbolic.py: 98%

42 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-04-03 05:39 +0000

1# Copyright (c) 2022 Darren Erik Vengroff 

2 

3""" 

4Utilities for creating symbolic names. 

5 

6This module processes data sets from the US Census 

7and their respective symbolic names for 

8documentation purposes. 

9""" 

10 

11from datetime import datetime 

12import argparse 

13from pathlib import Path 

14 

15import censusdis.data as ced 

16 

17 

18class symbolic: 

19 """ 

20 A generator of datasets' symbolic names file. 

21 

22 This creates symbolic names for datasets based on 

23 dataset names. The symbolic names are stored as 

24 dictionary keys with values of the dataset names 

25 and url. 

26 

27 Users will use this to generate most up to date 

28 dataset documentation file. 

29 """ 

30 

31 def __init__(self): 

32 self.dictionary = {} 

33 self.module_message = ( 

34 f"""# Copyright (c) {datetime.now().year} Darren Erik Vengroff\n""" 

35 """\"\"\" 

36Auto-generated module. It should not be edited directly. 

37 

38This module contains abbreviated names for commonly used data sets. 

39 

40These are typically used as the first argument to :py:func:`censudis.data.download`. 

41 

42The Census Bureau routinely adds new datasets, so there many be more data sets available 

43than there are symbolic names here. However, we have automated the process of generating 

44these symbolic names so they should almost always be very close to up to date. 

45 

46But you can always use raw strings. For example, even for `ACS5` you can use 

47 

48`acs/acs5` instead. 

49\"\"\" 

50""" 

51 ) 

52 

53 MONTHS = [ 

54 "jan", 

55 "feb", 

56 "mar", 

57 "apr", 

58 "may", 

59 "jun", 

60 "jul", 

61 "aug", 

62 "sep", 

63 "oct", 

64 "nov", 

65 "dec", 

66 ] 

67 

68 def store_dataset(self, dataset_list: list, url_list: list): 

69 """ 

70 Construct symbolic names and store as keys mapping to values of dataset and url. 

71 

72 Parameters 

73 ---------- 

74 dataset_list 

75 List of dataset names. Used to construct symbolic 

76 names and stored as value of symbolic name. 

77 

78 url_list 

79 List of dataset urls. Stored as value of symbolic name. 

80 

81 Returns 

82 ------- 

83 A dictionary storing the symbolic names of unique data sets 

84 that are available. 

85 """ 

86 for item, link in zip(dataset_list, url_list): 

87 if item not in self.dictionary.values(): 

88 components = item.split("/") 

89 # Different cases of naming according to dataset names like 'acs/acs5' 

90 # and special cases for clearer names 

91 if len(components) == 1: 

92 if components[0][:3] == "ecn" or components[0][:3] == "abs": 

93 name = ( 

94 components[0][:3].upper() + "_" + components[0][3:].upper() 

95 ) 

96 elif components[0] == "surname": 

97 name = "DECENNIAL_SURNAME" 

98 elif components[0] == "pubschlfin": 

99 name = "PUBLIC_PK12_EDUCATION_FINANCE" 

100 else: 

101 name = components[0].upper() 

102 elif len(components) == 2: 

103 if components[0][:3] == components[1][:3]: 

104 if components[0] == "popproj": 

105 name = components[0].upper() 

106 else: 

107 name = components[1].upper() 

108 else: 

109 if components[0] == "dec": 

110 components[0] = "decennial" 

111 if components[1] == "pl": 

112 components[1] = "PUBLIC_LAW_94_171" 

113 name = "_".join(components).upper() 

114 elif len(components) == 3 and components[2] in self.MONTHS: 

115 # This is the case for monthly data sets. 

116 name = "_".join(components).upper() 

117 else: 

118 if components[0][:3] == components[1][:3]: 

119 name = "_".join(components[1:]).upper() 

120 else: 

121 name = "_".join(components[:2]).upper() 

122 item, link = f'"{item}"', f'"{link}"' 

123 self.dictionary[name] = [item, link] 

124 return self.dictionary 

125 

126 def write_file(self, destination_file: str): # pragma: no cover 

127 """ 

128 Write symbolic names dictionary content into destination file. 

129 

130 Parameters 

131 ---------- 

132 destination_file 

133 The target file for storing the datasets' symbolic names. 

134 """ 

135 with open(destination_file, "w") as destfile: 

136 destfile.write(self.module_message) 

137 destfile.write("\n") 

138 

139 for key in sorted(self.dictionary.keys()): 

140 destfile.write("\n") 

141 content = key + " = " + self.dictionary[key][0] 

142 destfile.write(content) 

143 destfile.write("\n") 

144 

145 destfile.write("\n") 

146 destfile.write("\nDATASET_REFERENCE_URLS = {\n") 

147 

148 for key in sorted(self.dictionary.keys()): 

149 value = self.dictionary[key][1] 

150 destfile.write(" %s: %s,\n" % (key, value)) 

151 destfile.write("}\n") 

152 

153 # Write LODES 

154 destfile.write( 

155 """ 

156# LODES are special data sets with their own base URL. 

157 

158""" 

159 ) 

160 

161 lodes_symbols = [] 

162 

163 # LODES OD 

164 for part in ["main", "aux"]: 

165 for job_type in [f"JT0{ii}" for ii in range(6)]: 

166 symbol = f"LODES_OD_{part.upper()}_{job_type}" 

167 destfile.write( 

168 f'{symbol} = "lodes/od/{part}/{job_type.lower()}"\n\n' 

169 ) 

170 lodes_symbols.append(symbol) 

171 

172 # LODES RAC and WAC 

173 for dataset_kind in ["rac", "wac"]: 

174 for segment in [ 

175 "S000", 

176 "SA01", 

177 "SA02", 

178 "SA03", 

179 "SE01", 

180 "SE02", 

181 "SE03", 

182 "SI01", 

183 "SI02", 

184 "SI03", 

185 ]: 

186 for job_type in [f"JT0{ii}" for ii in range(6)]: 

187 symbol = ( 

188 f"LODES_{dataset_kind.upper()}_{segment.upper()}_{job_type}" 

189 ) 

190 destfile.write( 

191 f'{symbol} = "lodes/{dataset_kind}/{segment.lower()}/{job_type.lower()}"\n\n' 

192 ) 

193 lodes_symbols.append(symbol) 

194 

195 destfile.write("\nALL_LODES_DATA_SETS = [\n") 

196 for symbol in lodes_symbols: 

197 destfile.write(f" {symbol},\n") 

198 destfile.write("]\n") 

199 

200 

201def main(): # pragma: no cover 

202 """Generate a new version of datasets.py.""" 

203 df_datasets = ced.variables.all_data_sets() 

204 dataset_names = df_datasets["DATASET"].to_list() 

205 dataset_url = df_datasets["API BASE URL"].to_list() 

206 create_symbolic = symbolic() 

207 create_symbolic.store_dataset(dataset_names, dataset_url) 

208 

209 parser = argparse.ArgumentParser(description="Get destination file name.") 

210 parser.add_argument( 

211 "filename", 

212 metavar="filename", 

213 type=str, 

214 help="a file name for the symbolic name destination file", 

215 ) 

216 args = parser.parse_args() 

217 

218 path_directory = "censusdis/" 

219 target_directory = Path(path_directory, args.filename) 

220 create_symbolic.write_file(target_directory) 

221 

222 print("Generated " + args.filename + " file successfully.") 

223 

224 

225if __name__ == "__main__": 

226 main()