<- read_csv(
nyc_dog_license 'https://bcdanl.github.io/data/nyc_dog_license.csv')
Homework 3
ggplot
Visualization; Maps; Quarto Blogging
Direction
Please submit your Quarto Document for Part 1 in Homework 3 to Brightspace with the name below:
danl-310-hw3-LASTNAME-FIRSTNAME.qmd
( e.g.,danl-310-hw3-choe-byeonghak.qmd
)
The due is April 5, 2025, 5:00 P.M.
Please send Byeong-Hak an email (
bchoe@geneseo.edu
) if you have any questions.
Part 1. Map visualization
Question 1
The following data set is for Question 1:
<- read_csv(
nyc_zips_coord 'https://bcdanl.github.io/data/nyc_zips_coord.csv')
<- read_csv(
nyc_zips_df 'https://bcdanl.github.io/data/nyc_zips_df.csv')
Q1a
Replicate the following ggplot.
- You should calculate the proportion of
Pit Bull (or Mix)
for each zip code. - You should join data.frames properly.
- Choose the color palette from the
viridis
scales - Use
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
. - To insert the image, use the following
annotate()
:
- You should calculate the proportion of
# install.packages("ggtext")
library(ggtext)
annotate("richtext",
x = ,
y = ,
label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>",
fill = NA,
color = NA)
- Note that the size of ggplot figure is 6.18 (width) x 6.84 (height)
```{.r}
#| fig-width: 6.18
#| fig-height: 6.84
# YOUR CODE IS HERE
```
Click to Check the Data Prepping!
<- nyc_zips_df |>
nyc_zips_df left_join(nyc_zips_coord)
<- nyc_dog_license |>
nyc_dogs group_by(breed_rc) |>
summarise(N = n()) |>
filter(dense_rank(-N)<=5)
<- nyc_dog_license |>
nyc_fb group_by(zip_code, breed_rc) |>
count() |>
group_by(zip_code) |>
mutate(pct = round((n / sum(n))*100, 2)) |>
filter(breed_rc %in% c('Pit Bull (or Mix)'))
<- nyc_zips_df |>
fb_map left_join( nyc_fb )
Click to Check the ggplot!
|>
fb_map ggplot(mapping = aes(x = X, y = Y,
fill = pct,
group = zip_code)) +
annotate("richtext",
x = quantile(fb_map$X, .075, na.rm = T),
y = quantile(fb_map$Y, .60, na.rm = T),
label = "<img src='https://bcdanl.github.io/lec_figs/pitbull.png' width='750'/>",
fill = NA,
color = NA) +
geom_polygon(color = "gray80",
size = 0.1) +
scale_fill_viridis_c(option = "inferno",
breaks = seq(0, 25, 2.5)) +
labs(fill = "Percent of All Licensed Pit Bull",
title = "New York City's Pit Bull",
subtitle = "By Zip Code. Based on Licensing Data") +
theme_map() +
theme(legend.justification = c(.5,.5),
legend.position = 'top',
legend.direction = "horizontal",
plot.title = element_text(hjust = .5,
vjust = .5,
face = 'bold',
size = rel(2.5)),
plot.subtitle = element_text(hjust = .5,
vjust = .5,
size = rel(1.25))) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = guide_legend(title.position = "top",
label.position = "bottom",
keywidth = 2, nrow = 1))
Q1b
- Which
zip_code
has the highest proportion ofPit Bull (or Mix)
?
Click to Check the Answer!
<- fb_map |>
q1b select(zip_code, po_name, borough, pct) |>
distinct() |>
filter(dense_rank(-pct) == 1)
# for displaying data.frame:
datatable(q1b)
Click to Check Another One!
# https://r4ds.hadley.nz/data-transform.html#the-slice_-functions
<- fb_map |>
q1b select(zip_code, po_name, borough, pct) |>
distinct() |>
slice_max(pct, n = 1)
Question 2
The following data is for Question 2:
<- read_csv(
election_panel 'https://bcdanl.github.io/data/election_panel.csv')
- Replicate the following map.
- Do not use
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
.
- Do not use
Click to Check Data Prepping!
<- socviz::county_map
county_map <- county_map |>
county_map mutate(id = as.integer(id))
<- election_panel |>
election_panel mutate(id = as.integer(id))
<- county_map |>
county_full left_join(election_panel,
by = "id")
<- county_full |>
county_full arrange(year, county_fips, order)
# Function to create na_map for each year
<- function(yr){
na_map <- county_full |>
county_full_na filter(is.na(year)) |> # Part of Alaska
select(-year) |>
mutate( year = yr)
}
<- county_full
county_full_NAmap
# Row-binding na_map(yr) to county_full_NAmap for each year
for (yr in as.numeric( levels( factor( county_full$year ) ) ) ){
<- county_full_NAmap |>
county_full_NAmap rbind( na_map(yr) )
}
Click to Check the ggplot!
# Also, try it with
# (1) data = county_full
# (2) data = county_full |> filter(!is.na(year))
# (3) data = county_full_NAmap
<- ggplot(data = county_full_NAmap |> filter(!is.na(year)),
p1 mapping = aes(x = long, y = lat, group = group,
fill = pct_DEMOCRAT )) +
geom_polygon(color = "grey60",
linewidth = 0.025)
<- quantile(county_full$pct_DEMOCRAT,
q probs = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE)
<- p1 +
p2 scale_fill_gradient(
low = '#FFFFFF', # transparent white
high = '#2E74C0', # from party_colors for DEM
na.value = "grey80",
# midpoint = quantile(county_full$pct_DEMOCRAT, .5, na.rm = T),
breaks = q,
labels = c(paste0(round(q[1], 1), "\n(Min)"),
paste0(round(q[2], 1), "\n(25th)"),
paste0(round(q[3], 1), "\n(Median)"),
paste0(round(q[4], 1), "\n(75th)"),
paste0(round(q[5], 1), "\n(Max)")
),guide = "colourbar"
)
+ labs(fill = "Percent\nDemocrat",
p2 title = "U.S. Presidential Election, 2000-2020") +
theme_map() +
facet_wrap(.~ year, ncol = 2) +
theme(plot.margin = unit( c(1, 1, 4, 0.5), "cm"),
plot.title = element_text(size = rel(2),
hjust = .5),
legend.position = c(0.5, -.15),
legend.justification = c(.5,.5),
strip.background = element_rect(fill = "#e1ecf8",
colour = "black", size = .1)
+
) guides(fill = guide_colourbar(direction = "horizontal", barwidth = 20,
title.hjust = -1, title.vjust = 1))
Part 2. Quarto Blogging
- Use the following set of data.frames for Quarto Blogging:
<- read_csv(
nyc_dog_license 'https://bcdanl.github.io/data/nyc_dog_license.csv')
<- read_csv(
nyc_zips_coord 'https://bcdanl.github.io/data/nyc_zips_coord.csv')
<- read_csv(
nyc_zips_df 'https://bcdanl.github.io/data/nyc_zips_df.csv')