Coverage for utils/symbolic.py: 98%
42 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
1# Copyright (c) 2022 Darren Erik Vengroff
3"""
4Utilities for creating symbolic names.
6This module processes data sets from the US Census
7and their respective symbolic names for
8documentation purposes.
9"""
11from datetime import datetime
12import argparse
13from pathlib import Path
15import censusdis.data as ced
18class symbolic:
19 """
20 A generator of datasets' symbolic names file.
22 This creates symbolic names for datasets based on
23 dataset names. The symbolic names are stored as
24 dictionary keys with values of the dataset names
25 and url.
27 Users will use this to generate most up to date
28 dataset documentation file.
29 """
31 def __init__(self):
32 self.dictionary = {}
33 self.module_message = (
34 f"""# Copyright (c) {datetime.now().year} Darren Erik Vengroff\n"""
35 """\"\"\"
36Auto-generated module. It should not be edited directly.
38This module contains abbreviated names for commonly used data sets.
40These are typically used as the first argument to :py:func:`censudis.data.download`.
42The Census Bureau routinely adds new datasets, so there many be more data sets available
43than there are symbolic names here. However, we have automated the process of generating
44these symbolic names so they should almost always be very close to up to date.
46But you can always use raw strings. For example, even for `ACS5` you can use
48`acs/acs5` instead.
49\"\"\"
50"""
51 )
53 MONTHS = [
54 "jan",
55 "feb",
56 "mar",
57 "apr",
58 "may",
59 "jun",
60 "jul",
61 "aug",
62 "sep",
63 "oct",
64 "nov",
65 "dec",
66 ]
68 def store_dataset(self, dataset_list: list, url_list: list):
69 """
70 Construct symbolic names and store as keys mapping to values of dataset and url.
72 Parameters
73 ----------
74 dataset_list
75 List of dataset names. Used to construct symbolic
76 names and stored as value of symbolic name.
78 url_list
79 List of dataset urls. Stored as value of symbolic name.
81 Returns
82 -------
83 A dictionary storing the symbolic names of unique data sets
84 that are available.
85 """
86 for item, link in zip(dataset_list, url_list):
87 if item not in self.dictionary.values():
88 components = item.split("/")
89 # Different cases of naming according to dataset names like 'acs/acs5'
90 # and special cases for clearer names
91 if len(components) == 1:
92 if components[0][:3] == "ecn" or components[0][:3] == "abs":
93 name = (
94 components[0][:3].upper() + "_" + components[0][3:].upper()
95 )
96 elif components[0] == "surname":
97 name = "DECENNIAL_SURNAME"
98 elif components[0] == "pubschlfin":
99 name = "PUBLIC_PK12_EDUCATION_FINANCE"
100 else:
101 name = components[0].upper()
102 elif len(components) == 2:
103 if components[0][:3] == components[1][:3]:
104 if components[0] == "popproj":
105 name = components[0].upper()
106 else:
107 name = components[1].upper()
108 else:
109 if components[0] == "dec":
110 components[0] = "decennial"
111 if components[1] == "pl":
112 components[1] = "PUBLIC_LAW_94_171"
113 name = "_".join(components).upper()
114 elif len(components) == 3 and components[2] in self.MONTHS:
115 # This is the case for monthly data sets.
116 name = "_".join(components).upper()
117 else:
118 if components[0][:3] == components[1][:3]:
119 name = "_".join(components[1:]).upper()
120 else:
121 name = "_".join(components[:2]).upper()
122 item, link = f'"{item}"', f'"{link}"'
123 self.dictionary[name] = [item, link]
124 return self.dictionary
126 def write_file(self, destination_file: str): # pragma: no cover
127 """
128 Write symbolic names dictionary content into destination file.
130 Parameters
131 ----------
132 destination_file
133 The target file for storing the datasets' symbolic names.
134 """
135 with open(destination_file, "w") as destfile:
136 destfile.write(self.module_message)
137 destfile.write("\n")
139 for key in sorted(self.dictionary.keys()):
140 destfile.write("\n")
141 content = key + " = " + self.dictionary[key][0]
142 destfile.write(content)
143 destfile.write("\n")
145 destfile.write("\n")
146 destfile.write("\nDATASET_REFERENCE_URLS = {\n")
148 for key in sorted(self.dictionary.keys()):
149 value = self.dictionary[key][1]
150 destfile.write(" %s: %s,\n" % (key, value))
151 destfile.write("}\n")
153 # Write LODES
154 destfile.write(
155 """
156# LODES are special data sets with their own base URL.
158"""
159 )
161 lodes_symbols = []
163 # LODES OD
164 for part in ["main", "aux"]:
165 for job_type in [f"JT0{ii}" for ii in range(6)]:
166 symbol = f"LODES_OD_{part.upper()}_{job_type}"
167 destfile.write(
168 f'{symbol} = "lodes/od/{part}/{job_type.lower()}"\n\n'
169 )
170 lodes_symbols.append(symbol)
172 # LODES RAC and WAC
173 for dataset_kind in ["rac", "wac"]:
174 for segment in [
175 "S000",
176 "SA01",
177 "SA02",
178 "SA03",
179 "SE01",
180 "SE02",
181 "SE03",
182 "SI01",
183 "SI02",
184 "SI03",
185 ]:
186 for job_type in [f"JT0{ii}" for ii in range(6)]:
187 symbol = (
188 f"LODES_{dataset_kind.upper()}_{segment.upper()}_{job_type}"
189 )
190 destfile.write(
191 f'{symbol} = "lodes/{dataset_kind}/{segment.lower()}/{job_type.lower()}"\n\n'
192 )
193 lodes_symbols.append(symbol)
195 destfile.write("\nALL_LODES_DATA_SETS = [\n")
196 for symbol in lodes_symbols:
197 destfile.write(f" {symbol},\n")
198 destfile.write("]\n")
201def main(): # pragma: no cover
202 """Generate a new version of datasets.py."""
203 df_datasets = ced.variables.all_data_sets()
204 dataset_names = df_datasets["DATASET"].to_list()
205 dataset_url = df_datasets["API BASE URL"].to_list()
206 create_symbolic = symbolic()
207 create_symbolic.store_dataset(dataset_names, dataset_url)
209 parser = argparse.ArgumentParser(description="Get destination file name.")
210 parser.add_argument(
211 "filename",
212 metavar="filename",
213 type=str,
214 help="a file name for the symbolic name destination file",
215 )
216 args = parser.parse_args()
218 path_directory = "censusdis/"
219 target_directory = Path(path_directory, args.filename)
220 create_symbolic.write_file(target_directory)
222 print("Generated " + args.filename + " file successfully.")
225if __name__ == "__main__":
226 main()