Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4d04b19
add popsafe county -> fips and state data files
nmdefries Feb 16, 2023
b3a03d4
construct class attributes using CROSSWALK keys
nmdefries Feb 16, 2023
0284c28
support popsafe-fips in geomap.py; get_geos_within to take "fips"
nmdefries Feb 16, 2023
1f8a21b
combine fips and popsafe-fips get_geo_within logic; comments
nmdefries Feb 20, 2023
fb82be4
code to generate popsafe tables
nmdefries Feb 20, 2023
f284b91
document group assignment procedure
nmdefries Feb 21, 2023
ba62241
add lowpop county group mapping file
nmdefries Feb 21, 2023
a1c491c
test popsafe-fips
nmdefries Feb 20, 2023
f74aefd
linting
nmdefries Feb 21, 2023
1a1bba7
change code name
nmdefries Mar 2, 2023
bb7df1a
generate local county mapping from CHNG spreadsheet
nmdefries Mar 3, 2023
be8da31
remove "population" wording in relation to chng-fips
nmdefries Mar 3, 2023
54faa45
rename local county groups csv to not mention population
nmdefries Mar 3, 2023
18a08a5
drop split-out county fields; don't always exist in input
nmdefries Mar 13, 2023
0ef6b13
split out single counties from fips_list field
nmdefries Mar 13, 2023
f9ed4d4
raise error if new groups seen
nmdefries Mar 13, 2023
4fa39b7
state_fips and group are already str; don't need to cast
nmdefries Mar 13, 2023
4b9e836
Merge branch 'ndefries/geomapper/popsafe-county-level' into ndefries/…
nmdefries Mar 13, 2023
dc02bb5
make error more actionable
nmdefries Mar 13, 2023
f785b22
Merge pull request #1803 from cmu-delphi/ndefries/geomapper/chng-fips…
korlaxxalrok Mar 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
402 changes: 402 additions & 0 deletions _delphi_utils_python/data_proc/geomap/chng_county_groups.csv

Large diffs are not rendered by default.

175 changes: 175 additions & 0 deletions _delphi_utils_python/data_proc/geomap/geo_data_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,18 @@
FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?"
STATE_HHS_FILE = "hhs.txt"
ZIP_POP_MISSING_FILE = "zip_pop_filling.csv"
CHNG_COUNTY_GROUPS_FILE = "chng_county_groups.csv"

# Out files
FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
FIPS_MSA_OUT_FILENAME = "fips_msa_table.csv"
FIPS_HRR_OUT_FILENAME = "fips_hrr_table.csv"
FIPS_ZIP_OUT_FILENAME = "fips_zip_table.csv"
FIPS_HHS_FILENAME = "fips_hhs_table.csv"
FIPS_CHNGFIPS_OUT_FILENAME = "fips_chng-fips_table.csv"
FIPS_POPULATION_OUT_FILENAME = "fips_pop.csv"

CHNGFIPS_STATE_OUT_FILENAME = "chng-fips_state_table.csv"
ZIP_HSA_OUT_FILENAME = "zip_hsa_table.csv"
ZIP_HRR_OUT_FILENAME = "zip_hrr_table.csv"
ZIP_FIPS_OUT_FILENAME = "zip_fips_table.csv"
Expand Down Expand Up @@ -475,6 +478,176 @@ def derive_zip_hhs_crosswalk():
zip_state.sort_values(["zip", "hhs"]).to_csv(join(OUTPUT_DIR, ZIP_HHS_FILENAME), index=False)


def derive_fips_chngfips_crosswalk():
"""Build a crosswalk table for FIPS to CHNG FIPS."""
if not isfile(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME)):
derive_fips_state_crosswalk()

assign_county_groups()
county_groups = pd.read_csv(CHNG_COUNTY_GROUPS_FILE, dtype="string", index_col=False)
# Split list of county FIPS codes into separate columns.
county_groups = pd.concat(
[county_groups, county_groups.fips_list.str.split("|", expand=True)],
axis=1
).drop(
columns = "fips_list"
)

# Change to long format.
county_groups = pd.melt(
county_groups,
id_vars = ["state_fips", "group"],
var_name = "county_num",
value_name = "fips"
).drop(
columns="county_num"
).dropna()

county_groups["state_fips"] = county_groups["state_fips"].str.zfill(2)
county_groups["group"] = county_groups["group"].str.zfill(2)
county_groups["fips"] = county_groups["fips"].str.zfill(5).astype("string")
# Combine state codes and group ids into a single FIPS code.
county_groups["chng-fips"] = county_groups["state_fips"] + "g" + county_groups["group"]

county_groups = county_groups[["fips", "chng-fips"]]
fips_to_state = pd.read_csv(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME), dtype="string", index_col=False)

# Get all the fips that aren't included in the chng groupings.
extra_fips_list = list(set(fips_to_state.fips) - set(county_groups.fips))
# Normal fips codes and CHNG fips codes are the same for ungrouped counties.
extra_fips_df = pd.DataFrame({"fips" : extra_fips_list, "chng-fips" : extra_fips_list}, dtype="string")

# Combine grouped and ungrouped counties.
pd.concat(
[county_groups, extra_fips_df]
).sort_values(
["fips", "chng-fips"]
).to_csv(
join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME), index=False
)


def derive_chngfips_state_crosswalk():
"""Build a crosswalk table for FIPS to CHNG FIPS."""
if not isfile(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME)):
derive_fips_state_crosswalk()

if not isfile(join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME)):
derive_fips_chngfips_crosswalk()

fips_to_group = pd.read_csv(join(OUTPUT_DIR, FIPS_CHNGFIPS_OUT_FILENAME), dtype="string", index_col=False)
fips_to_state = pd.read_csv(join(OUTPUT_DIR, FIPS_STATE_OUT_FILENAME), dtype="string", index_col=False)

group_to_state = fips_to_group.join(
fips_to_state.set_index("fips"), on="fips", how="left"
).drop(
columns = "fips"
).drop_duplicates(
).sort_values(
["chng-fips", "state_code"]
)
group_to_state.to_csv(join(OUTPUT_DIR, CHNGFIPS_STATE_OUT_FILENAME), index=False)


def fetch_county_groups_spreadsheet():
# County mapping file is derived from
# https://docs.google.com/spreadsheets/d/1PEce4CjjHbRM1Z5xEMNI6Xsq_b2kkCh0/edit#gid=871427657
sheet_id = "1PEce4CjjHbRM1Z5xEMNI6Xsq_b2kkCh0"
sheet_name = "groupings"
# Request sheet in CSV format via tag in URL.
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

county_groups = pd.read_csv(
url, dtype="string", index_col=False
).dropna(
how="all", axis=1
)
county_groups["state FIPS"] = county_groups["state FIPS"].astype(int)

# Counties belonging to each group are listed (as FIPS codes) in the "county
# FIPS grouping" column, concatenated and separated by the pipe "|".
new_names = {
"state FIPS": "state_fips",
"county FIPS grouping": "fips_list"
}

county_groups = county_groups.rename(
columns=new_names
)[new_names.values()]

return county_groups


def assign_county_groups():
county_groups = fetch_county_groups_spreadsheet()

# If a county groups mapping file already exists in `data_proc/geomap`, we
# have to be careful to not reassign a group number to a different group.
# Group numbers must remain fixed, even if a given county group is no longer
# being used.
if isfile(CHNG_COUNTY_GROUPS_FILE):
old_county_groups = pd.read_csv(CHNG_COUNTY_GROUPS_FILE, dtype="string", index_col=False)
old_county_groups.group = old_county_groups.group.astype(int)
old_county_groups.state_fips = old_county_groups.state_fips.astype(int)

# Remove rows from county_groups if that `fips_list` value already
# exists in old_county_groups.
county_groups = county_groups[
~county_groups.fips_list.isin(old_county_groups.fips_list)
]

# If grouping file has no new rows, no need to process again.
if county_groups.empty:
return
# Grouping spreadsheet contains rows not seen in old, on-disk county
# groupings file. Combining the two is delicate. While the code below
# appears to work, it has not been formally tested and could be
# invalid for even small changes to the format of the input county
# groupings file.
else:
raise NotImplementedError(
"Can't combine old and new county groupings automatically, "
"code below is not tested or robust to changes in input format."
"We recommend manually working with the code below and the new"
"data in a REPL."
)

# Assign an incrementing integer to be the group id of each remaining
# county grouping within a state using the given sort order.
county_groups["group"] = county_groups.groupby("state_fips").cumcount() + 1

# Find max group number by state in old_county_groups, join on, and
# add max group number to group number.
max_group_by_state = old_county_groups.groupby(
"state_fips"
).group.max(
).reset_index(
).rename(
columns = {"group": "max_group"}
)
county_groups = county_groups.join(
max_group_by_state.set_index("state_fips"),
how="left",
on="state_fips"
).assign(
group = lambda x: x.group + x.max_group
).drop(
["max_group"], axis=1
)

# Combine old_county_groups and county_groups
county_groups = pd.concat([old_county_groups, county_groups])
else:
# Group numbers are 1-indexed.
county_groups["group"] = county_groups.groupby("state_fips").cumcount() + 1

county_groups.sort_values(
["state_fips"], kind="stable"
).to_csv(
CHNG_COUNTY_GROUPS_FILE, index=False
)


def clear_dir(dir_path: str):
for fname in listdir(dir_path):
remove(join(dir_path, fname))
Expand All @@ -501,3 +674,5 @@ def clear_dir(dir_path: str):
derive_zip_population_table()
derive_fips_hhs_crosswalk()
derive_zip_hhs_crosswalk()
derive_fips_chngfips_crosswalk()
derive_chngfips_state_crosswalk()
Loading