Coverage for censusdis/cli/yamlspec.py: 91%
306 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-04-03 05:39 +0000
1# Copyright (c) 2023 Darren Erik Vengroff
2"""Classes that are loaded from YAML config files for the CLI."""
3from abc import ABC
4import itertools
5from importlib import import_module
6from pathlib import Path
7from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, ClassVar
9import geopandas as gpd
10import pandas as pd
11import yaml
13from matplotlib.ticker import StrMethodFormatter
15import censusdis.data as ced
16import censusdis.maps as cem
17import censusdis.datasets
18import censusdis.states
19from censusdis.geography import InSpecType
20from censusdis.impl.varsource.base import VintageType
23def _class_constructor(clazz: ClassVar):
24 def constructor(
25 loader: yaml.SafeLoader, node: yaml.nodes.MappingNode
26 ) -> VariableSpec:
27 """Construct a new object of the given class."""
28 kwargs = loader.construct_mapping(node, deep=True)
29 return clazz(**kwargs)
31 return constructor
34class VariableSpec(ABC):
35 """
36 Abstract ase class for specification of variables to download from the U.S. Census API.
38 Parameters
39 ----------
40 denominator
41 The denominator to divide by when constructing fractional variables.
42 If `False` then no fractional variables are added. If the name of a
43 variable, that variable will be downloaded and used as a denominator
44 to compute fractional versions of all of the other variables. If `True`
45 then the denominator will be computed as the sum of all the other
46 variables.
47 frac_prefix
48 The prefix to prepend to fractional variables. If `None` a default
49 prefix of `'frac_'` is used.
50 """
52 def __init__(
53 self,
54 *,
55 denominator: Union[str, bool] = False,
56 frac_prefix: Optional[str] = None,
57 frac_not: bool = False,
58 ):
59 self._denominator = denominator
61 if frac_prefix is None:
62 frac_prefix = "frac_"
64 self._frac_prefix = frac_prefix
66 self._frac_not = frac_not
68 @property
69 def denominator(self) -> Union[str, bool]:
70 """The denominator to divide by when constructing fractional variables."""
71 return self._denominator
73 @property
74 def frac_prefix(self) -> str:
75 """The prefix to prepend to fractional variables."""
76 return self._frac_prefix
78 @property
79 def frac_not(self) -> str:
80 """Should we return 1 - fraction instead of fraction."""
81 return self._frac_not
83 def variables_to_download(self) -> List[str]:
84 """Return a list of the variables that need to be downloaded from the U.S. Census API."""
85 if isinstance(self._denominator, str):
86 return [self._denominator]
88 return []
90 def groups_to_download(self) -> List[Tuple[str, bool]]:
91 """
92 Return the names of groups of variables that need to be downloaded from the U.S. Census API.
94 Returns
95 -------
96 The names of groups to download.
97 """
98 return []
100 def synthesize(self, df_downloaded: Union[pd.DataFrame, gpd.GeoDataFrame]) -> None:
101 """
102 Post-process after downloading to compute variables like fractional variables are constructed.
104 Parameters
105 ----------
106 df_downloaded
107 A data frame of variables that were downloaded. Any systhesized variables
108 are added as new columns.
110 Returns
111 -------
112 None. Any additions are made in-place in `df_downloaded`.
113 """
114 return df_downloaded
116 def download(
117 self,
118 dataset: str,
119 vintage: VintageType,
120 *,
121 set_to_nan: Union[bool, Iterable[int]] = True,
122 skip_annotations: bool = True,
123 with_geometry: bool = False,
124 contained_within: Optional[ced.ContainedWithin] = None,
125 remove_water: bool = False,
126 api_key: Optional[str] = None,
127 row_keys: Optional[Union[str, Iterable[str]]] = None,
128 **kwargs: InSpecType,
129 ) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
130 """
131 Download the variables we need from the U.S. Census API.
133 Most of the optional parameters here mirror those in
134 :py:func:`~ced.download`.
136 Parameters
137 ----------
138 dataset
139 The dataset to download from. For example `"acs/acs5"`,
140 `"dec/pl"`, or `"timeseries/poverty/saipe/schdist"`. There are
141 symbolic names for datasets, like `ACS5` for `"acs/acs5"
142 in :py:module:`censusdis.datasets`.
143 vintage
144 The vintage to download data for. For most data sets this is
145 an integer year, for example, `2020`. But for
146 a timeseries data set, pass the string `'timeseries'`.
147 set_to_nan
148 A list of values that should be set to NaN. Normally these are special
149 values that the U.S. Census API sometimes returns. If `True`, then all
150 values in :py:ref:`censusdis.values.ALL_SPECIAL_VALUES` will be replaced.
151 If `False`, no replacements will be made.
152 skip_annotations
153 If `True` try to filter out `group` or `leaves_of_group` variables that are
154 annotations rather than actual values. See :py:meth:`VariableCache.group_variables`
155 for more details. Variable names passed in `download_variables` are not
156 affected by this flag.
157 with_geometry
158 If `True` a :py:class:`gpd.GeoDataFrame` will be returned and each row
159 will have a geometry that is a cartographic boundary suitable for platting
160 a map. See https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.2020.html
161 for details of the shapefiles that will be downloaded on your behalf to
162 generate these boundaries.
163 contained_within
164 An optional :py:class:`~ced.ContainedWithin` if we want to download
165 geometries contained within others.
166 remove_water
167 If `True` and if with_geometry=True, will query TIGER for AREAWATER shapefiles and
168 remove water areas from returned geometry.
169 api_key
170 An optional API key. If you don't have or don't use a key, the number
171 of calls you can make will be limited to 500 per day.
172 row_keys
173 An optional set of identifier keys to help merge together requests for more than the census API limit of
174 50 variables per query. These keys are useful for census datasets such as the Current Population Survey
175 where the geographic identifiers do not uniquely identify each row.
176 kwargs
177 A specification of the geometry that we want data for. For example,
178 `state = "*", county = "*"` will download county-level data for
179 the entire US.
181 Returns
182 -------
183 A :py:class:`~pd.DataFrame` or `~gpd.GeoDataFrame` containing the requested US Census data.
184 """
185 group_list = self.groups_to_download()
187 groups = [group for group, leaves_only in group_list if not leaves_only]
188 leaves_of_groups = [group for group, leaves_only in group_list if leaves_only]
190 if len(groups) == 0:
191 groups = None
193 if len(leaves_of_groups) == 0:
194 leaves_of_groups = None
196 # Our download might be scoped to be contained
197 # within some other geometries.
198 if contained_within:
199 download_scope = contained_within
200 else:
201 download_scope = ced
203 df_or_gdf = download_scope.download(
204 dataset=dataset,
205 vintage=vintage,
206 download_variables=self.variables_to_download(),
207 group=groups,
208 leaves_of_group=leaves_of_groups,
209 set_to_nan=set_to_nan,
210 skip_annotations=skip_annotations,
211 with_geometry=with_geometry,
212 remove_water=remove_water,
213 api_key=api_key,
214 row_keys=row_keys,
215 **kwargs,
216 )
218 self.synthesize(df_or_gdf)
220 return df_or_gdf
222 @classmethod
223 def _yaml_loader(cls):
224 loader = yaml.SafeLoader
225 loader.add_constructor("!VariableList", _class_constructor(VariableList))
226 loader.add_constructor("!Group", _class_constructor(CensusGroup))
227 loader.add_constructor("!SpecCollection", _variable_spec_collection_constructor)
228 return loader
230 @classmethod
231 def load_yaml(cls, path: Union[str, Path]):
232 """Load a YAML file containing a `VariableSpec`."""
233 loader = cls._yaml_loader()
235 loaded = yaml.load(open(path, "rb"), Loader=loader)
237 return loaded
240class VariableList(VariableSpec):
241 """
242 Specification of a list of variables to download from the U.S. Census API.
244 Parameters
245 ----------
246 variables
247 The variables to download.
248 denominator
249 The denominator to divide by when constructing fractional variables.
250 If `False` then no fractional variables are added. If the name of a
251 variable, that variable will be downloaded and used as a denominator
252 to compute fractional versions of all of the other variables. If `True`
253 then the denominator will be computed as the sum of all the other
254 variables.
255 frac_prefix
256 The prefix to prepend to fractional variables. If `None` a default
257 prefix of `'frac_'` is used.
258 """
260 def __init__(
261 self,
262 variables: Union[str, Iterable[str]],
263 *,
264 denominator: Union[str, bool] = False,
265 frac_prefix: Optional[str] = None,
266 frac_not: Optional[bool] = False,
267 ):
268 super().__init__(
269 denominator=denominator, frac_prefix=frac_prefix, frac_not=frac_not
270 )
271 if isinstance(variables, str):
272 self._variables = [variables]
273 else:
274 self._variables = list(variables)
276 def variables_to_download(self) -> List[str]:
277 """
278 Return a list of the variables that need to be downloaded from the U.S. Census API.
280 This consists of the variables passed at construction time, and a denominator
281 variable if one was specified.
282 """
283 if (
284 isinstance(self.denominator, str)
285 and self.denominator not in self._variables
286 ):
287 # We specified a specific denominator that was not already
288 # one of the variables, so get it.
289 return self._variables + [self.denominator]
290 else:
291 # We don't need to fetch an extra variable for the denominator.
292 return self._variables
294 def synthesize(self, df_downloaded: Union[pd.DataFrame, gpd.GeoDataFrame]):
295 """
296 Post-process after downloading to compute variables like fractional variables are constructed.
298 This is where fractional variables are generated.
300 Parameters
301 ----------
302 df_downloaded
303 A data frame of variables that were downloaded. Any systhesized variables
304 are added as new columns.
306 Returns
307 -------
308 None. Any additions are made in-place in `df_downloaded`.
309 """
310 if not self.denominator:
311 return df_downloaded
313 if isinstance(self.denominator, str):
314 for variable in self._variables:
315 frac = df_downloaded[variable] / df_downloaded[self.denominator]
316 if self.frac_not:
317 df_downloaded[f"{self.frac_prefix}{variable}"] = 1.0 - frac
318 else:
319 df_downloaded[f"{self.frac_prefix}{variable}"] = frac
320 elif self.denominator:
321 denominator = df_downloaded[self._variables].sum(axis="columns")
322 for variable in self._variables:
323 frac = df_downloaded[variable] / denominator
324 if self.frac_not:
325 df_downloaded[f"{self.frac_prefix}{variable}"] = 1.0 - frac
326 else:
327 df_downloaded[f"{self.frac_prefix}{variable}"] = frac
329 def __eq__(self, other) -> bool:
330 """Are two `VariableList`'s equal."""
331 if not isinstance(other, VariableList):
332 return False
334 return (
335 sorted(self._variables) == sorted(other._variables)
336 and self.denominator == other.denominator
337 )
340class CensusGroup(VariableSpec):
341 """
342 Specification of a group of variables to download from the U.S. Census API.
344 Parameters
345 ----------
346 group
347 The name of a census group, such as `B03002`, or a list of several
348 such groups.
349 leaves_only
350 If `True`, then only download the variables that are at the leaves of
351 the group, not the internal variables.
352 denominator
353 The denominator to divide by when constructing fractional variables.
354 If `False` then no fractional variables are added. If the name of a
355 variable, that variable will be downloaded and used as a denominator
356 to compute fractional versions of all of the other variables. If `True`
357 then the denominator will be computed as the sum of all the other
358 variables.
359 frac_prefix
360 The prefix to prepend to fractional variables. If `None` a default
361 prefix of `'frac_'` is used.
362 """
364 def __init__(
365 self,
366 group: Union[str, Iterable[str]],
367 *,
368 leaves_only: bool = False,
369 denominator: Optional[str] = None,
370 frac_prefix: Optional[str] = None,
371 frac_not: bool = False,
372 ):
373 if denominator is None:
374 denominator = False
376 super().__init__(
377 denominator=denominator, frac_prefix=frac_prefix, frac_not=frac_not
378 )
379 self._group = [group] if isinstance(group, str) else list(group)
380 self._leaves_only = leaves_only
382 def groups_to_download(self) -> List[Tuple[str, bool]]:
383 """
384 Return the names of groups of variables that need to be downloaded from the U.S. Census API.
386 The returned value are simply the groups specificed at construction time.
388 Returns
389 -------
390 The names of groups to download.
391 """
392 return [(group, self._leaves_only) for group in self._group]
394 def synthesize(self, df_downloaded: Union[pd.DataFrame, gpd.GeoDataFrame]):
395 """
396 Post-process after downloading to compute variables like fractional variables are constructed.
398 This is where fractional variables are generated.
400 Parameters
401 ----------
402 df_downloaded
403 A data frame of variables that were downloaded. Any systhesized variables
404 are added as new columns.
406 Returns
407 -------
408 None. Any additions are made in-place in `df_downloaded`.
409 """
410 if isinstance(self.denominator, str):
411 for group in self._group:
412 for variable in df_downloaded.columns:
413 if variable.startswith(group):
414 frac = df_downloaded[variable] / df_downloaded[self.denominator]
415 if self.frac_not:
416 df_downloaded[f"{self.frac_prefix}{variable}"] = 1.0 - frac
417 else:
418 df_downloaded[f"{self.frac_prefix}{variable}"] = frac
419 elif self.denominator:
420 for group in self._group:
421 denominator = df_downloaded[
422 [
423 variable
424 for variable in df_downloaded.columns
425 if variable.startswith(group)
426 ]
427 ].sum(axis="columns")
428 for variable in df_downloaded.columns:
429 if variable.startswith(group):
430 frac = df_downloaded[variable] / denominator
431 if self.frac_not:
432 df_downloaded[f"{self.frac_prefix}{variable}"] = 1.0 - frac
433 else:
434 df_downloaded[f"{self.frac_prefix}{variable}"] = frac
436 def __eq__(self, other) -> bool:
437 """Are two `CensusGroup`'s equal."""
438 if not isinstance(other, CensusGroup):
439 return False
441 return (
442 sorted(self._group) == sorted(other._group)
443 and self.denominator == other.denominator
444 and self._leaves_only == other._leaves_only
445 )
448class VariableSpecCollection(VariableSpec):
449 """
450 Specification built on top of a collection of other :py:class:`~VariableSpec`s.
452 When downloading, all the groups and all the variables
453 specified in any of the constituent specs will be
454 downloaded.
456 Parameters
457 ----------
458 variable_specs
459 A collection of other :py:class:`~VariableSpec`s.
460 """
462 def __init__(self, variable_specs: Iterable[VariableSpec]):
463 super().__init__(denominator=None)
464 self._variable_specs = list(variable_specs)
466 def variables_to_download(self) -> List[str]:
467 """
468 Return a list of the variables that need to be downloaded from the U.S. Census API.
470 Returns all the variables to be downloaded by the :py:class:`~VariableSpec`'s
471 in the collection.
472 """
473 return list(
474 set(
475 itertools.chain(
476 *[spec.variables_to_download() for spec in self._variable_specs]
477 )
478 )
479 )
481 def groups_to_download(self) -> List[Tuple[str, bool]]:
482 """
483 Return the names of groups of variables that need to be downloaded from the U.S. Census API.
485 The result is a list of the unique groups returned by all the :py:class:`~VariableSpec`'s
486 given at construction time.
488 Returns
489 -------
490 The names of groups to download.
491 """
492 return list(
493 set(
494 itertools.chain(
495 *[spec.groups_to_download() for spec in self._variable_specs]
496 )
497 )
498 )
500 def synthesize(self, df_downloaded: Union[pd.DataFrame, gpd.GeoDataFrame]):
501 """
502 Post-process after downloading to compute variables like fractional variables are constructed.
504 We do this by calling `synthesize` on each of our constituent variable specifications.
506 Parameters
507 ----------
508 df_downloaded
509 A data frame of variables that were downloaded. Any systhesized variables
510 are added as new columns.
512 Returns
513 -------
514 None. Any additions are made in-place in `df_downloaded`.
515 """
516 df = df_downloaded
517 for spec in self._variable_specs:
518 spec.synthesize(df)
520 def __eq__(self, other) -> bool:
521 """Are two `VariableSpecCollection`s equal."""
522 if not isinstance(other, VariableSpecCollection):
523 return False
525 if len(self._variable_specs) != len(other._variable_specs):
526 return False
528 matched = set()
530 # Does every spec in self have a unique match in other?
531 for self_spec in self._variable_specs:
532 match = False
533 # We use ii to record those in other that have been
534 # matched so we don't try to match again.
535 for ii, other_spec in enumerate(self._variable_specs):
536 if ii not in matched and self_spec == other_spec:
537 match = True
538 matched.add(ii)
539 break
540 if not match:
541 return False
543 return True
546def _variable_spec_collection_constructor(
547 loader: yaml.SafeLoader, node: yaml.nodes.SequenceNode
548) -> VariableSpecCollection:
549 """Construct a variable spec collection."""
550 variable_specs = loader.construct_sequence(node, deep=True)
551 return VariableSpecCollection(variable_specs)
554class DataSpec:
555 """
556 A specification for what data we want from the U.S. Census API.
558 In order to download data we must know the data set and vintage
559 and have one or more :py:class:`~VariableSpec`s that tell us
560 what variables we need and what synthetic variables to create,
561 for example fractional variables.
563 Parameters
564 ----------
565 dataset
566 The dataset to download from. For example `"acs/acs5"`,
567 `"dec/pl"`, or `"timeseries/poverty/saipe/schdist"`. There are
568 symbolic names for datasets, like `ACS5` for `"acs/acs5"
569 in :py:module:`censusdis.datasets`.
570 vintage
571 The vintage to download data for. For most data sets this is
572 an integer year, for example, `2020`. specs
573 geography
574 A specification of the geography, for example `{'state': '*'}`
575 for all states or `{'state': censusdis.states.NJ, 'county': '*'}`
576 for all counties in New Jersey.
577 contained_within
578 An optional specification for the geometry the results should be
579 contained within. For example, we could select a CBSA here and
580 put wildcards for state and county in `geography` to get all counties
581 contained within the CBSA. We need this in cases like this because
582 CBSAs are off-spine while states and counties are on-spine.
583 area_threshold
584 How much of the area of a geometry must be contained in an outer
585 geometry for it to be included.
586 with_geometry
587 If `True` a :py:class:`gpd.GeoDataFrame` will be returned and each row
588 will have a geometry that is a cartographic boundary suitable for platting
589 a map. See https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.2020.html
590 for details of the shapefiles that will be downloaded on your behalf to
591 generate these boundaries.
592 remove_water
593 If `True` and if with_geometry=True, will query TIGER for AREAWATER shapefiles and
594 remove water areas from returned geometry.
595 """
597 def __init__(
598 self,
599 dataset: str,
600 vintage: VintageType,
601 specs: Union[VariableSpec, Iterable[VariableSpec]],
602 geography: Dict[str, Union[str, List[str]]],
603 *,
604 contained_within: Optional[Dict[str, Union[str, List[str]]]] = None,
605 area_threshold: float = 0.8,
606 with_geometry: bool = False,
607 remove_water: bool = False,
608 ):
609 # Map symbolic names or use what we are given if there is no mapping.
610 self._dataset = getattr(censusdis.datasets, dataset, dataset)
611 self._vintage = vintage
612 # If it is a raw list construct a collection around it.
613 self._variable_spec = (
614 specs if isinstance(specs, VariableSpec) else VariableSpecCollection(specs)
615 )
616 self._geography = self.map_state_and_county_names(geography)
618 if contained_within is None:
619 self._contained_within = None
620 else:
621 contained_within = self.map_state_and_county_names(contained_within)
622 self._contained_within = ced.ContainedWithin(
623 area_threshold, **contained_within
624 )
626 self._with_geometry = with_geometry
627 self._remove_water = remove_water
629 @classmethod
630 def map_state_and_county_names(
631 cls, geography: Dict[str, Union[str, List[str]]]
632 ) -> Dict[str, Union[str, List[str]]]:
633 """If there is a state and optionally counties a geography, try to map them."""
635 def map_state(state: str) -> str:
636 """Map the name if a symbolic name exists."""
637 return getattr(censusdis.states, state, state)
639 def _map_county(state: str):
640 """Construct a function to map counties in a state."""
641 state_symbol = (
642 censusdis.states.NAMES_FROM_IDS[state].lower().replace(" ", "_")
643 )
645 state_county_module = import_module(f"censusdis.counties.{state_symbol}")
647 def map_county(county: str):
648 """Map a county in the given state."""
649 county = getattr(state_county_module, county, county)
650 return county
652 return map_county
654 # If there is no 'state' in geography there is nothing to do.
655 # If there is a 'state', we copy the dict and do the mapping.
656 if "state" in geography:
657 geography = dict(geography)
659 # We might need to map the symbol.
660 if isinstance(geography["state"], str):
661 geography["state"] = map_state(geography["state"])
663 if isinstance(geography["state"], str):
664 # There is a single state, so there might be counties
665 # underneath it that need mapping.
666 if "county" in geography and geography["state"] != "*":
667 map_county = _map_county(geography["state"])
668 if isinstance(geography["county"], str):
669 geography["county"] = map_county(geography["county"])
670 else:
671 geography["county"] = [
672 map_county(county) for county in geography["county"]
673 ]
674 else:
675 geography["state"] = [map_state(state) for state in geography["state"]]
677 return geography
679 @property
680 def dataset(self) -> str:
681 """What data set to query."""
682 return self._dataset
684 @property
685 def vintage(self) -> VintageType:
686 """What vintage."""
687 return self._vintage
689 @property
690 def with_geometry(self) -> bool:
691 """Do we want to download geometry as well as data so we can plot maps."""
692 return self._with_geometry
694 @property
695 def remove_water(self) -> bool:
696 """Should we improve the geometry by masking off water."""
697 return self._remove_water
699 @property
700 def variable_spec(self) -> VariableSpec:
701 """The specification of variables to download."""
702 return self._variable_spec
704 @property
705 def geography(self) -> Dict[str, Union[str, List[str]]]:
706 """What geography to download data for."""
707 return self._geography
709 @property
710 def contained_within(self) -> Union[None, ced.ContainedWithin]:
711 """What geometry are we contained within."""
712 return self._contained_within
714 def download(
715 self,
716 api_key: Optional[str] = None,
717 ) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
718 """
719 Download the data we want from the U.S. Census API.
721 Parameters
722 ----------
723 api_key
724 An optional API key. If you don't have or don't use a key, the number
725 of calls you can make will be limited to 500 per day.
727 Returns
728 -------
729 A :py:class:`~pd.DataFrame` or `~gpd.GeoDataFrame` containing the requested US Census data.
730 """
731 return self._variable_spec.download(
732 dataset=self.dataset,
733 vintage=self._vintage,
734 with_geometry=self._with_geometry,
735 contained_within=self._contained_within,
736 remove_water=self._remove_water,
737 api_key=api_key,
738 **self._geography,
739 )
741 @classmethod
742 def _yaml_loader(cls):
743 loader = VariableSpec._yaml_loader()
744 loader.add_constructor("!DataSpec", _class_constructor(cls))
745 return loader
747 @classmethod
748 def load_yaml(cls, path: Union[str, Path]):
749 """Load a YAML file containing a `DataSpec`."""
750 loader = cls._yaml_loader()
752 loaded = yaml.load(open(path, "rb"), Loader=loader)
754 return loaded
757class PlotSpec:
758 """
759 A specification for how to plot data we downloaded.
761 Parameters
762 ----------
763 variable
764 What variable to plot. Specify this to shade geographies
765 based on the value of the variable. Leave out and set `boundary=True`
766 to plot boundaries instead.
767 boundary
768 Should we plot boundaries instead of filled geographies?
769 If `True`, `variable` should not be specified.
770 title
771 A title for the plot.
772 with_background
773 If `True`, plot over a background map.
774 legend
775 If `True` and plotting a variable (not a boundary) then add a legend.
776 legend_format
777 How to format the numbers on the legend. The options are
778 '"float"', `"int"`, `"dollar"`, `"percent"`, or a format string like `"${x:.2f}"`
779 to choose any Python string format you want.
780 projection
781 What projection to use. `"US"` means move AK, HI, and PR. `None` means
782 use what the map is already in. Anything else is interpreted as an EPSG.
783 plot_kwargs
784 Additional keyword args for matplotlib to use in plotting.
785 """
787 def __init__(
788 self,
789 *,
790 variable: Optional[str] = None,
791 boundary: bool = False,
792 title: Optional[str] = None,
793 with_background: bool = False,
794 plot_kwargs: Optional[Dict[str, Any]] = None,
795 projection: Optional[str] = None,
796 legend: bool = True,
797 legend_format: Optional[str] = None,
798 ):
799 if variable is None and not boundary:
800 raise ValueError("Must specify either `variable=` or `boundary=True`")
801 if variable is not None and boundary:
802 raise ValueError("Must specify only one of `variable=` or `boundary=True`")
804 if projection is None:
805 projection = "US"
807 self._variable = variable
808 self._boundary = boundary
809 self._title = title
810 self._legend = legend
811 self._legend_format = legend_format
812 self._with_background = with_background
813 if plot_kwargs is None:
814 plot_kwargs: Dict[str, Any] = {}
815 self._plot_kwargs = plot_kwargs
816 self._projection = projection
818 @property
819 def variable(self) -> Union[str, None]:
820 """What variable will we plot."""
821 return self._variable
823 @property
824 def boundary(self) -> bool:
825 """Should we plot boundaries instead of a variable."""
826 return self._boundary
828 @property
829 def with_background(self) -> bool:
830 """Should we plot a background map from Open Street Maps."""
831 return self._with_background
833 @property
834 def plot_kwargs(self) -> Dict[str, Any]:
835 """
836 Additional keyword args to control the plot.
838 e.g. `{'figsize': [12, 8]} to change the default size of the plot.
839 """
840 return self._plot_kwargs
842 @property
843 def title(self):
844 """The plot title."""
845 return self._title
847 @property
848 def legend(self):
849 """Is there a legend."""
850 return self._legend
852 @property
853 def legend_format(self):
854 """Format for the legend numbers."""
855 return self._legend_format
857 @property
858 def projection(self):
859 """What projection to use when plotting."""
860 return self._projection
862 def __eq__(self, other) -> bool:
863 """Are two `PlotSpec`'s equal."""
864 if not isinstance(other, PlotSpec):
865 return False
867 return (
868 self._variable == other._variable
869 and self._boundary == other._boundary
870 and self._with_background == other._with_background
871 and self._projection == other._projection
872 and self._title == other._title
873 and self._legend == other._legend
874 and self._legend_format == other._legend_format
875 and self._plot_kwargs == other._plot_kwargs
876 )
878 _LEGEND_FORMATS: Dict[str, Tuple[str, float]] = {
879 "dollar": ("${x:,.0f}", 1.0),
880 "int": ("{x:,.0f}", 1.0),
881 "float": ("{x:,}", 1.0),
882 "percent": ("{x:.0f}%", 100),
883 }
885 def _final_legend_format(self):
886 return self._LEGEND_FORMATS.get(self._legend_format, (self._legend_format, 1.0))
888 def plot(self, gdf: gpd.GeoDataFrame, ax=None):
889 """
890 Plot data on a map according to the specification.
892 Parameters
893 ----------
894 gdf
895 The data to plot.
896 ax
897 Optional existing ax to plot on top of.
899 Returns
900 -------
901 `ax` of the plot.
902 """
903 final_legend_format, legend_scale = self._final_legend_format()
905 legend_kwds = (
906 None
907 if self._boundary or not self._legend or self._legend_format is None
908 else {"format": StrMethodFormatter(final_legend_format)}
909 )
911 if self._projection in ["US", "us", "U.S."]:
912 if self._boundary:
913 ax = cem.plot_us_boundary(
914 gdf,
915 self._variable,
916 with_background=self._with_background,
917 do_relocate_ak_hi_pr=True,
918 ax=ax,
919 **self._plot_kwargs,
920 )
921 else:
922 gdf["_scaled_var"] = gdf[self._variable] * legend_scale
923 ax = cem.plot_us(
924 gdf,
925 "_scaled_var",
926 with_background=self._with_background,
927 do_relocate_ak_hi_pr=True,
928 legend=self._legend,
929 legend_kwds=legend_kwds,
930 ax=ax,
931 **self._plot_kwargs,
932 )
933 else:
934 gdf = gdf.to_crs(epsg=self._projection)
936 if self._boundary:
937 gdf = gdf.boundary
938 else:
939 gdf["_scaled_var"] = gdf[self._variable] * legend_scale
941 ax = cem.plot_map(
942 gdf,
943 self._variable if self._boundary else "_scaled_var",
944 with_background=self._with_background,
945 legend=self._legend and not self._boundary,
946 legend_kwds=legend_kwds,
947 ax=ax,
948 **self.plot_kwargs,
949 )
951 if self._title is not None:
952 ax.set_title(self._title)
954 return ax
956 @classmethod
957 def _yaml_loader(cls):
958 loader = yaml.SafeLoader
959 loader.add_constructor("!PlotSpec", _class_constructor(cls))
960 return loader
962 @classmethod
963 def load_yaml(cls, path: Union[str, Path]) -> "PlotSpec":
964 """Load a YAML file containing a `PlotSpec`."""
965 loader = cls._yaml_loader()
967 loaded = yaml.load(open(path, "rb"), Loader=loader)
969 return loaded