nyc_dog_license <- read_csv(
'https://bcdanl.github.io/data/nyc_dog_license.csv')Homework 3
ggplot Visualization; Maps; Quarto Blogging
Direction
Please submit your Quarto Document for Part 1 in Homework 3 to Brightspace with the name below:
danl-310-hw3-LASTNAME-FIRSTNAME.qmd
( e.g.,danl-310-hw3-choe-byeonghak.qmd)
The due is April 5, 2025, 5:00 P.M.
Please send Byeong-Hak an email (
bchoe@geneseo.edu) if you have any questions.
Part 1. Map visualization
Question 1
The following data set is for Question 1:
nyc_zips_coord <- read_csv(
'https://bcdanl.github.io/data/nyc_zips_coord.csv')nyc_zips_df <- read_csv(
'https://bcdanl.github.io/data/nyc_zips_df.csv')Q1a
Replicate the following ggplot.
- You should calculate the proportion of
Pit Bull (or Mix)for each zip code. - You should join data.frames properly.
- Choose the color palette from the
viridisscales - Use
coord_map(projection = "albers", lat0 = 39, lat1 = 45). - To insert the image, use the following
annotate():
- You should calculate the proportion of
# install.packages("ggtext")
library(ggtext)
annotate("richtext",
x = ,
y = ,
label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>",
fill = NA,
color = NA)
- Note that the size of ggplot figure is 6.18 (width) x 6.84 (height)
```{.r}
#| fig-width: 6.18
#| fig-height: 6.84
# YOUR CODE IS HERE
```Click to Check the Data Prepping!
nyc_zips_df <- nyc_zips_df |>
left_join(nyc_zips_coord)
nyc_dogs <- nyc_dog_license |>
group_by(breed_rc) |>
summarise(N = n()) |>
filter(dense_rank(-N)<=5)
nyc_fb <- nyc_dog_license |>
group_by(zip_code, breed_rc) |>
count() |>
group_by(zip_code) |>
mutate(pct = round((n / sum(n))*100, 2)) |>
filter(breed_rc %in% c('Pit Bull (or Mix)'))
fb_map <- nyc_zips_df |>
left_join( nyc_fb )Click to Check the ggplot!
fb_map |>
ggplot(mapping = aes(x = X, y = Y,
fill = pct,
group = zip_code)) +
annotate("richtext",
x = quantile(fb_map$X, .075, na.rm = T),
y = quantile(fb_map$Y, .60, na.rm = T),
label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>",
fill = NA,
color = NA) +
geom_polygon(color = "gray80",
size = 0.1) +
scale_fill_viridis_c(option = "inferno",
breaks = seq(0, 25, 2.5)) +
labs(fill = "Percent of All Licensed Pit Bull",
title = "New York City's Pit Bull",
subtitle = "By Zip Code. Based on Licensing Data") +
theme_map() +
theme(legend.justification = c(.5,.5),
legend.position = 'top',
legend.direction = "horizontal",
plot.title = element_text(hjust = .5,
vjust = .5,
face = 'bold',
size = rel(2.5)),
plot.subtitle = element_text(hjust = .5,
vjust = .5,
size = rel(1.25))) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = guide_legend(title.position = "top",
label.position = "bottom",
keywidth = 2, nrow = 1))Q1b
- Which
zip_codehas the highest proportion ofPit Bull (or Mix)?
Click to Check the Answer!
q1b <- fb_map |>
select(zip_code, po_name, borough, pct) |>
distinct() |>
filter(dense_rank(-pct) == 1)
# for displaying data.frame:
datatable(q1b)Click to Check Another One!
# https://r4ds.hadley.nz/data-transform.html#the-slice_-functions
q1b <- fb_map |>
select(zip_code, po_name, borough, pct) |>
distinct() |>
slice_max(pct, n = 1) Question 2
The following data is for Question 2:
election_panel <- read_csv(
'https://bcdanl.github.io/data/election_panel.csv')- Replicate the following map.
- Do not use
coord_map(projection = "albers", lat0 = 39, lat1 = 45).
- Do not use

Click to Check Data Prepping!
county_map <- socviz::county_map
county_map <- county_map |>
mutate(id = as.integer(id))
election_panel <- election_panel |>
mutate(id = as.integer(id))
county_full <- county_map |>
left_join(election_panel,
by = "id")
county_full <- county_full |>
arrange(year, county_fips, order)
# Function to create na_map for each year
na_map <- function(yr){
county_full_na <- county_full |>
filter(is.na(year)) |> # Part of Alaska
select(-year) |>
mutate( year = yr)
}
county_full_NAmap <- county_full
# Row-binding na_map(yr) to county_full_NAmap for each year
for (yr in as.numeric( levels( factor( county_full$year ) ) ) ){
county_full_NAmap <- county_full_NAmap |>
rbind( na_map(yr) )
}Click to Check the ggplot!
# Also, try it with
# (1) data = county_full
# (2) data = county_full |> filter(!is.na(year))
# (3) data = county_full_NAmap
p1 <- ggplot(data = county_full_NAmap |> filter(!is.na(year)),
mapping = aes(x = long, y = lat, group = group,
fill = pct_DEMOCRAT )) +
geom_polygon(color = "grey60",
linewidth = 0.025)
q <- quantile(county_full$pct_DEMOCRAT,
probs = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE)
p2 <- p1 +
scale_fill_gradient(
low = '#FFFFFF', # transparent white
high = '#2E74C0', # from party_colors for DEM
na.value = "grey80",
# midpoint = quantile(county_full$pct_DEMOCRAT, .5, na.rm = T),
breaks = q,
labels = c(paste0(round(q[1], 1), "\n(Min)"),
paste0(round(q[2], 1), "\n(25th)"),
paste0(round(q[3], 1), "\n(Median)"),
paste0(round(q[4], 1), "\n(75th)"),
paste0(round(q[5], 1), "\n(Max)")
),
guide = "colourbar"
)
p2 + labs(fill = "Percent\nDemocrat",
title = "U.S. Presidential Election, 2000-2020") +
theme_map() +
facet_wrap(.~ year, ncol = 2) +
theme(plot.margin = unit( c(1, 1, 4, 0.5), "cm"),
plot.title = element_text(size = rel(2),
hjust = .5),
legend.position = c(0.5, -.15),
legend.justification = c(.5,.5),
strip.background = element_rect(fill = "#e1ecf8",
colour = "black", size = .1)
) +
guides(fill = guide_colourbar(direction = "horizontal", barwidth = 20,
title.hjust = -1, title.vjust = 1))Part 2. Quarto Blogging
- Use the following set of data.frames for Quarto Blogging:
nyc_dog_license <- read_csv(
'https://bcdanl.github.io/data/nyc_dog_license.csv')nyc_zips_coord <- read_csv(
'https://bcdanl.github.io/data/nyc_zips_coord.csv')nyc_zips_df <- read_csv(
'https://bcdanl.github.io/data/nyc_zips_df.csv')