Homework 3

ggplot Visualization; Maps; Quarto Blogging

Author

Byeong-Hak Choe

Published

April 23, 2025

Modified

April 23, 2025

Direction

Please submit your Quarto Document for Part 1 in Homework 3 to Brightspace with the name below:
- danl-310-hw3-LASTNAME-FIRSTNAME.qmd
  ( e.g., danl-310-hw3-choe-byeonghak.qmd )
The due is April 5, 2025, 5:00 P.M.
Please send Byeong-Hak an email (bchoe@geneseo.edu) if you have any questions.

Part 1. Map visualization

Question 1

The following data set is for Question 1:

nyc_dog_license <- read_csv(
  'https://bcdanl.github.io/data/nyc_dog_license.csv')

nyc_zips_coord <- read_csv(
  'https://bcdanl.github.io/data/nyc_zips_coord.csv')

nyc_zips_df <- read_csv(
  'https://bcdanl.github.io/data/nyc_zips_df.csv')

Q1a

Replicate the following ggplot.
- You should calculate the proportion of Pit Bull (or Mix) for each zip code.
- You should join data.frames properly.
- Choose the color palette from the viridis scales
  - https://ggplot2.tidyverse.org/reference/scale_viridis.html.
- Use coord_map(projection = "albers", lat0 = 39, lat1 = 45).
- To insert the image, use the following annotate():

# install.packages("ggtext")
library(ggtext)

annotate("richtext", 
         x =  , 
         y =  , 
         label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>", 
         fill = NA,
         color = NA)

Note that the size of ggplot figure is 6.18 (width) x 6.84 (height)

```{.r}
#| fig-width: 6.18
#| fig-height: 6.84

# YOUR CODE IS HERE
```

Click to Check the Data Prepping!

nyc_zips_df <- nyc_zips_df |> 
  left_join(nyc_zips_coord)

nyc_dogs <- nyc_dog_license |>
  group_by(breed_rc) |> 
  summarise(N = n()) |> 
  filter(dense_rank(-N)<=5)

nyc_fb <- nyc_dog_license |>
  group_by(zip_code, breed_rc) |>
  count() |>
  group_by(zip_code) |> 
  mutate(pct = round((n / sum(n))*100, 2)) |>
  filter(breed_rc %in% c('Pit Bull (or Mix)'))


fb_map <- nyc_zips_df |> 
  left_join( nyc_fb )

Click to Check the ggplot!

fb_map |> 
  ggplot(mapping = aes(x = X, y = Y, 
                       fill = pct,
                       group = zip_code)) +
  annotate("richtext", 
           x = quantile(fb_map$X, .075, na.rm = T), 
           y = quantile(fb_map$Y, .60, na.rm = T), 
           label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>", 
           fill = NA,
           color = NA) +
  geom_polygon(color = "gray80", 
               size = 0.1) +
  scale_fill_viridis_c(option = "inferno",
                       breaks = seq(0, 25, 2.5)) +
  labs(fill = "Percent of All Licensed Pit Bull",
       title = "New York City's Pit Bull",
       subtitle = "By Zip Code. Based on Licensing Data") +
  theme_map() +
  theme(legend.justification = c(.5,.5),
        legend.position = 'top',
        legend.direction = "horizontal",
        plot.title = element_text(hjust = .5,
                                  vjust = .5,
                                  face = 'bold',
                                  size = rel(2.5)),
        plot.subtitle = element_text(hjust = .5,
                                     vjust = .5,
                                     size = rel(1.25))) +
  coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
  guides(fill = guide_legend(title.position = "top",
                             label.position = "bottom",
                             keywidth = 2, nrow = 1))

Q1b

Which zip_code has the highest proportion of Pit Bull (or Mix)?

Click to Check the Answer!

q1b <- fb_map |> 
  select(zip_code, po_name, borough, pct) |> 
  distinct() |> 
  filter(dense_rank(-pct) == 1)

# for displaying data.frame:
datatable(q1b)

Click to Check Another One!

# https://r4ds.hadley.nz/data-transform.html#the-slice_-functions
q1b <- fb_map |> 
  select(zip_code, po_name, borough, pct) |> 
  distinct() |> 
  slice_max(pct, n = 1)

Question 2

The following data is for Question 2:

election_panel <- read_csv(
  'https://bcdanl.github.io/data/election_panel.csv')

Replicate the following map.
- Do not use coord_map(projection = "albers", lat0 = 39, lat1 = 45).

Click to Check Data Prepping!

county_map <- socviz::county_map
county_map <- county_map |> 
  mutate(id = as.integer(id))

election_panel <- election_panel |> 
  mutate(id = as.integer(id))

county_full <- county_map |> 
  left_join(election_panel, 
            by = "id")

county_full <- county_full |> 
  arrange(year, county_fips, order)

# Function to create na_map for each year
na_map <- function(yr){ 
  county_full_na <- county_full |> 
    filter(is.na(year)) |>  # Part of Alaska
    select(-year) |> 
    mutate( year = yr)
}

county_full_NAmap <- county_full

# Row-binding na_map(yr) to county_full_NAmap for each year
for (yr in as.numeric( levels( factor( county_full$year ) ) ) ){
  county_full_NAmap <- county_full_NAmap |> 
    rbind( na_map(yr) )
}

Click to Check the ggplot!

# Also, try it with 
# (1) data = county_full
# (2) data = county_full |> filter(!is.na(year))
# (3) data = county_full_NAmap

p1 <- ggplot(data = county_full_NAmap |> filter(!is.na(year)),
             mapping = aes(x = long, y = lat, group = group, 
                           fill = pct_DEMOCRAT )) + 
  geom_polygon(color = "grey60", 
               linewidth = 0.025) 

q <- quantile(county_full$pct_DEMOCRAT, 
              probs = c(0, 0.25, 0.5, 0.75, 1), 
              na.rm = TRUE)

p2 <- p1 + 
  scale_fill_gradient( 
  low = '#FFFFFF',  # transparent white
  high = '#2E74C0',  # from party_colors for DEM
  na.value = "grey80",
  # midpoint = quantile(county_full$pct_DEMOCRAT, .5, na.rm = T),
  breaks = q,
  labels = c(paste0(round(q[1], 1), "\n(Min)"),
             paste0(round(q[2], 1), "\n(25th)"),
             paste0(round(q[3], 1), "\n(Median)"),
             paste0(round(q[4], 1), "\n(75th)"),
             paste0(round(q[5], 1), "\n(Max)")
             ),
  guide = "colourbar"
  ) 

p2 + labs(fill = "Percent\nDemocrat",
          title = "U.S. Presidential Election, 2000-2020") +
  theme_map() + 
  facet_wrap(.~ year, ncol = 2) +
  theme(plot.margin = unit( c(1, 1, 4, 0.5), "cm"),
        plot.title = element_text(size = rel(2),
                                  hjust = .5),
        legend.position = c(0.5, -.15),
        legend.justification = c(.5,.5),
        strip.background = element_rect(fill = "#e1ecf8", 
                                        colour = "black", size = .1)
        ) +
  guides(fill = guide_colourbar(direction = "horizontal", barwidth = 20,
                                title.hjust = -1, title.vjust = 1))

Part 2. Quarto Blogging

Use the following set of data.frames for Quarto Blogging:

nyc_dog_license <- read_csv(
  'https://bcdanl.github.io/data/nyc_dog_license.csv')

nyc_zips_coord <- read_csv(
  'https://bcdanl.github.io/data/nyc_zips_coord.csv')

nyc_zips_df <- read_csv(
  'https://bcdanl.github.io/data/nyc_zips_df.csv')