library(tidyverse)
library(nycflights13)
<- flights
flights
::datatable(flights |> head(100)) DT
Classwork 7
Data Transformation Practice
Consider the flights
data.frame from the nycflights13
package.
Question 1
- Look at the number of cancelled flights per day. Is there a pattern? Is the proportion of cancelled flights related to the average delay?
- Assume that a flight is cancelled if either
dep_delay
,arr_delay
, or both are missing.
- Assume that a flight is cancelled if either
Click to Check the Answer!
<- flights |>
q1 mutate(is_cancelled = is.na(dep_delay) | is.na(arr_delay),
date = make_date(year,month,day)) |>
group_by(date, is_cancelled) |>
summarise(n = n(),
dep_delay = mean(dep_delay, na.rm = T),
arr_delay = mean(arr_delay, na.rm = T)) |>
mutate(prop = n / sum(n))
# filter(is_cancelled == T) |>
# arrange(-prop)
<- q1 |>
q1_1 filter(is_cancelled == T) |>
arrange(-prop)
Click to Check the Answer!
|>
q1 filter(is_cancelled == T) |>
ggplot(aes(x = date, y = prop)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))
Click to Check the Answer!
|>
q1 ggplot(aes(x = dep_delay, fill = is_cancelled)) +
geom_density(alpha = .4)
Click to Check the Answer!
|>
q1 ggplot(aes(x = arr_delay, fill = is_cancelled)) +
geom_density(alpha = .4)
Click to Check the Answer!
|>
q1_1 filter(prop < .2) |>
ggplot(aes(x = prop, y = dep_delay)) +
geom_point(alpha = .3) +
geom_smooth() +
geom_smooth(method = lm, color = 'darkorange')
Question 2
- Which
carrier
has the worst arrival delays? - Calculate the proportion of flights with an arrival delay greater than 15 minutes for each carrier and for each origin.
- Can you disentangle the effects of bad airports vs. bad carriers? Why/why not?
Click to Check the Answer!
<- flights |>
q2_1 group_by(carrier) |>
summarise(arr_delay_max = max(arr_delay, na.rm = T),
n = n()) |>
arrange(-arr_delay_max)
q2_1
# A tibble: 16 × 3
carrier arr_delay_max n
<chr> <dbl> <int>
1 HA 1272 342
2 MQ 1127 26397
3 AA 1007 32729
4 DL 931 48110
5 F9 834 685
6 9E 744 18460
7 VX 676 5162
8 EV 577 54173
9 FL 572 3260
10 B6 497 54635
11 US 492 20536
12 UA 455 58665
13 WN 453 12275
14 YV 381 601
15 AS 198 714
16 OO 157 32
Click to Check the Answer!
<- flights |>
q2_2 group_by(carrier, origin) |>
summarize(long_arr_delay = mean(arr_delay > 15, na.rm = T)) |>
filter(n() == 3)
Click to Check the Answer!
|>
q2_2 ggplot(aes(x = carrier, y = long_arr_delay, fill = carrier)) +
geom_col(show.legend = F) +
facet_wrap(~origin, ncol = 1) +
scale_fill_viridis_d()
Click to Check the Answer!
|>
q2_2 ggplot(aes(y = origin, x = long_arr_delay, fill = origin)) +
geom_col(show.legend = F) +
facet_wrap(~carrier, nrow = 1) +
scale_fill_viridis_d()
Click to Check the Answer!
<- lm(arr_delay ~ origin + carrier, data = flights)
m <- lm(arr_delay ~ origin * carrier, data = flights)
m_int
summary(m)
Call:
lm(formula = arr_delay ~ origin + carrier, data = flights)
Residuals:
Min 1Q Median 3Q Max
-89.38 -23.77 -11.34 7.35 1278.92
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.4813 0.4054 23.390 < 2e-16 ***
originJFK -2.3170 0.2577 -8.991 < 2e-16 ***
originLGA -1.9093 0.2371 -8.052 8.15e-16 ***
carrierAA -7.2351 0.4256 -16.999 < 2e-16 ***
carrierAS -19.4122 1.7121 -11.338 < 2e-16 ***
carrierB6 1.9717 0.3871 5.093 3.52e-07 ***
carrierDL -5.9238 0.4011 -14.769 < 2e-16 ***
carrierEV 6.6826 0.4356 15.342 < 2e-16 ***
carrierF9 14.3488 1.7401 8.246 < 2e-16 ***
carrierFL 12.5440 0.8747 14.341 < 2e-16 ***
carrierHA -14.0794 2.4190 -5.820 5.87e-09 ***
carrierMQ 3.1542 0.4528 6.966 3.26e-12 ***
carrierOO 3.9641 8.2334 0.481 0.630
carrierUA -5.4858 0.4262 -12.873 < 2e-16 ***
carrierUS -5.7979 0.4801 -12.076 < 2e-16 ***
carrierVX -6.1027 0.7068 -8.634 < 2e-16 ***
carrierWN 1.1171 0.5520 2.024 0.043 *
carrierYV 7.9851 1.9374 4.122 3.76e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 44.29 on 327328 degrees of freedom
(9430 observations deleted due to missingness)
Multiple R-squared: 0.0153, Adjusted R-squared: 0.01525
F-statistic: 299.2 on 17 and 327328 DF, p-value: < 2.2e-16
Click to Check the Answer!
summary(m_int)
Call:
lm(formula = arr_delay ~ origin * carrier, data = flights)
Residuals:
Min 1Q Median 3Q Max
-85.32 -23.89 -11.48 7.38 1278.92
Coefficients: (13 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.6153 1.2809 1.261 0.207315
originJFK 7.2281 1.3354 5.413 6.21e-08 ***
originLGA 0.1533 1.5718 0.098 0.922310
carrierAA -0.6376 1.4909 -0.428 0.668927
carrierAS -11.5461 2.0980 -5.503 3.73e-08 ***
carrierB6 7.7733 1.3940 5.576 2.46e-08 ***
carrierDL 7.1652 1.4480 4.948 7.48e-07 ***
carrierEV 15.4074 1.2992 11.859 < 2e-16 ***
carrierF9 20.1522 1.9246 10.471 < 2e-16 ***
carrierFL 18.3474 1.2026 15.256 < 2e-16 ***
carrierHA -15.7585 2.4220 -6.506 7.71e-11 ***
carrierMQ 14.6918 1.6045 9.157 < 2e-16 ***
carrierOO 19.8847 18.1078 1.098 0.272147
carrierUA 1.8599 1.2976 1.433 0.151767
carrierUS -0.6381 1.4468 -0.441 0.659169
carrierVX -2.2924 1.7036 -1.346 0.178406
carrierWN 9.4480 1.4014 6.742 1.57e-11 ***
carrierYV 13.7884 2.1043 6.552 5.67e-11 ***
originJFK:carrierAA -6.1245 1.5841 -3.866 0.000111 ***
originLGA:carrierAA -2.4627 1.7842 -1.380 0.167491
originJFK:carrierAS NA NA NA NA
originLGA:carrierAS NA NA NA NA
originJFK:carrierB6 -7.7230 1.4604 -5.288 1.24e-07 ***
originLGA:carrierB6 3.9695 1.7619 2.253 0.024260 *
originJFK:carrierDL -18.3878 1.5278 -12.035 < 2e-16 ***
originLGA:carrierDL -5.0060 1.7356 -2.884 0.003923 **
originJFK:carrierEV -6.4619 1.8184 -3.554 0.000380 ***
originLGA:carrierEV -7.8960 1.6600 -4.757 1.97e-06 ***
originJFK:carrierF9 NA NA NA NA
originLGA:carrierF9 NA NA NA NA
originJFK:carrierFL NA NA NA NA
originLGA:carrierFL NA NA NA NA
originJFK:carrierHA NA NA NA NA
originLGA:carrierHA NA NA NA NA
originJFK:carrierMQ -11.0665 1.7329 -6.386 1.70e-10 ***
originLGA:carrierMQ -7.1255 1.8777 -3.795 0.000148 ***
originJFK:carrierOO NA NA NA NA
originLGA:carrierOO -12.2185 20.3428 -0.601 0.548087
originJFK:carrierUA -8.1928 1.5045 -5.446 5.17e-08 ***
originLGA:carrierUA 1.0137 1.6627 0.610 0.542067
originJFK:carrierUS -6.0912 1.7018 -3.579 0.000345 ***
originLGA:carrierUS 1.4004 1.7548 0.798 0.424835
originJFK:carrierVX -3.7232 1.8957 -1.964 0.049534 *
originLGA:carrierVX NA NA NA NA
originJFK:carrierWN NA NA NA NA
originLGA:carrierWN -2.9976 1.7666 -1.697 0.089727 .
originJFK:carrierYV NA NA NA NA
originLGA:carrierYV NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 44.24 on 327311 degrees of freedom
(9430 observations deleted due to missingness)
Multiple R-squared: 0.01748, Adjusted R-squared: 0.01738
F-statistic: 171.3 on 34 and 327311 DF, p-value: < 2.2e-16
Question 3
- Find all destinations that are flown by at least two carriers. Use that information to rank the carriers.
Click to Check the Answer!
<- flights |>
q3_1 group_by(carrier, dest) |>
summarize(n_flights = n()) |>
summarise(n_dests = n()) |>
arrange(-n_dests)
<- flights |>
q3_2 distinct(carrier, dest) |>
count(carrier) |>
arrange(-n)
<- flights |>
q3_3 distinct(carrier, dest) |>
count(dest) |>
arrange(-n) |>
filter(n >= 2)
Question 4
- The following is the data.frame for Question 4.
<- read_csv("https://bcdanl.github.io/data/holiday_movies.csv") holiday_movies
The data.frame holiday_movies comes from the Internet Movie Database (IMDb).
The following is the data.frame, holiday_movies.
Variable description
tconst: alphanumeric unique identifier of the title
title_type: the type/format of the title
- (movie, video, or tvMovie)
primary_title: the more popular title / the title used by the filmmakers on promotional materials at the point of release
simple_title: the title in lowercase, with punctuation removed, for easier filtering and grouping
year: the release year of a title
runtime_minutes: primary runtime of the title, in minutes
average_rating: weighted average of all the individual user ratings on IMDb
num_votes: number of votes the title has received on IMDb (titles with fewer than 10 votes were not included in this dataset)
- The following is another data.frame holiday_movie_genres that is related with the data.frame holiday_movies:
<- read_csv("https://bcdanl.github.io/data/holiday_movie_genres.csv") holiday_movie_genres
- The data.frame holiday_movie_genres include up to three genres associated with the titles that appear in the data.frame.
Variable description
- tconst: alphanumeric unique identifier of the title
- genres: genres associated with the title, one row per genre
Q4a.
- Provide the R code to generate the data.frame, holiday_movie_with_genres, which combines the two data.frames, holiday_movies and holiday_movie_genres:
Click to Check the Answer!
<- holiday_movie_genres |>
holiday_movies_with_genres left_join(holiday_movies)
- The following shows the first four variables in holiday_movie_with_genres:
Q4b.
Provide the R code using
skimr::skim()
to see how the summary statistics—mean, median, standard deviation, minimum, maximum, first and third quartiles—of average_rating and num_votes varies by popular genres and title_type.- Consider only the five popular genres, which are selected in terms of the number of titles for each genre.
- Removes the video type of the titles when calculating the summary statistics.
Click to Check the Answer!
<- holiday_movies_with_genres |>
popular_genres group_by(genres) |>
count() |>
ungroup() |>
slice_max(n, n = 5)
|>
holiday_movies_with_genres filter(genres %in% popular_genres$genres,
!= 'video') |>
title_type group_by(genres, title_type) |>
::skim(average_rating, num_votes) skimr
Q4c.
- Provide R code to recreate the ggplot figure illustrating how the relationship between log10(num_votes) and average_rating varies by the popular genres and title_type.
- The five popular genres are selected in terms of the number of titles for each genre.
- The video type of the titles are removed in the ggplot figure.
Click to Check the Answer!
<- holiday_movies_with_genres |>
popular_genres group_by(genres) |>
count() |>
ungroup() |>
slice_max(n, n = 5)
|>
holiday_movies_with_genres filter(genres %in% popular_genres$genres,
!= 'video') |>
title_type group_by(genres) |>
mutate(mean_rating = mean(average_rating, na.rm = T)) |>
ggplot(aes(y = average_rating, x = log10(num_votes))) +
geom_point(alpha = .2) +
geom_smooth(aes(color = genres),
method = lm) +
# coord_cartesian(ylim = c(5,7)) +
facet_grid(title_type~ genres, scales = "free")
Q4d.
- Provide a comment to illustrate how the relationship between log10(num_votes) and average_rating varies by the popular genres and title_type.
Q4e.
Provide R code to recreate the ggplot figure illustrating the annual trend of the share of number of movies by popular genre from year 1975 to 2022.
- For genres that are not popular, categorize them as “Other”.
- Consider reordering the categories in genres in descending order based on their share in the year 2022.
- Use “Set2” color palette from the
RColorBrewer
package.
- For genres that are not popular, categorize them as “Other”.
Click to Check the Answer!
|>
holiday_movies_with_genres mutate(genres = ifelse( !(genres %in% popular_genres$genres), "Other", genres )) |>
group_by(year, genres) |>
count() |>
filter(year >= 1975, year <= 2022) |>
ggplot() +
geom_col(aes(x = year, y = n,
fill = fct_reorder2(genres, year, n)),
position = 'fill',
width = rel(1.25),
color = 'transparent') +
labs(y = "Share of number of movies by genre",
fill = "Genre", x = "",
title = "How Have Christmas Movie Genres Evolved Over Time?") +
scale_fill_brewer(palette = "Set2") +
::scale_y_percent() +
hrbrthemesguides(fill = guide_legend(title.position = "top",
# label.position = "bottom",
keywidth = 3,
keyheight = rel(3.4),
ncol = 1)) +
theme(legend.position = "right",
legend.title =
element_text(face = "bold.italic",
size = rel(1.25),
hjust = .5),
legend.text =
element_text(face = "italic",
size = rel(1.25),
hjust = .5),
legend.box.margin = margin(-20, 0, 0, -20),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(margin = margin(0,-20,0,0)),
axis.title.x = element_text(face = "bold",
size = rel(1.5)),
axis.title.y = element_text(face = "bold",
size = rel(1.5)),
plot.title = element_text(
margin = margin(0,0,20,0),
hjust = .33
) )
- c.f.) The following uses
geom_area()
instead:
Click to Check the Answer!
<- holiday_movies_with_genres |>
df mutate(genres = ifelse(!(genres %in% popular_genres$genres), "Other", genres)) |>
group_by(year, genres) |>
count() |>
filter(year >= 1975, year <= 2022) |>
group_by(year) |>
mutate(prop = n / sum(n)) |> # Convert count to proportion
ungroup()
# df2 <- df |>
# group_by(year) |>
# summarise(sum = sum(prop))
|>
df ggplot(aes(x = year, y = prop, fill = fct_reorder2(genres, year, prop))) +
geom_area(position = "fill") + # Stack by genre
labs(y = "Share of number of movies by genre",
fill = "Genre", x = "",
title = "How Have Christmas Movie Genres Evolved Over Time?") +
scale_fill_brewer(palette = "Set2") +
::scale_y_percent() +
hrbrthemesguides(fill = guide_legend(title.position = "top",
# label.position = "bottom",
keywidth = 3,
keyheight = rel(3.4),
ncol = 1)) +
theme(legend.position = "right",
legend.title = element_text(face = "bold.italic",
size = rel(1.25),
hjust = .5),
legend.text = element_text(face = "italic",
size = rel(1.25),
hjust = .5),
legend.box.margin = margin(-20, 0, 0, -20),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(margin = margin(0, -20, 0, 0)),
axis.title.x = element_text(face = "bold",
size = rel(1.5)),
axis.title.y = element_text(face = "bold",
size = rel(1.5)),
plot.title = element_text(
margin = margin(0,0,20,0),
hjust = .33
) )
Q4f.
Provide a comment to illustrate the annual trend of (1) the share of number of movies by popular genre from year 1975 to 2022.
- Which genre has become more popular since 2010?
Q4g.
Add the following two variables—christmas and holiday—to the data.frame holiday_movies_with_genres:
christmas:
- TRUE if the simple_title includes “christmas”, “xmas”, “x mas”
- FALSE otherwise
holiday:
- TRUE if the simple_title includes “holiday”
- FALSE otherwise
Click to Check the Answer!
<- holiday_movies_with_genres |>
holiday_movies_with_genres mutate(christmas = ifelse(str_detect(simple_title, "christmas") |
str_detect(simple_title, "xmas") |
str_detect(simple_title, "x mas"),
T, F),holiday = ifelse(str_detect(simple_title, "holiday"),
T, F), )
Q4h.
- Provide R code to recreate the ggplot figure illustrating the annual trend of (1) the number of movie titles with “holiday” varies by christmas.
Click to Check the Answer!
|>
holiday_movies_with_genres group_by(year, christmas, holiday) |>
count() |>
ggplot(aes(x = year, y = n, color = holiday)) +
geom_smooth() +
geom_point(alpha = .33) +
facet_wrap(christmas~., scales = "free")
Q4i.
- Provide R code to recreate the ggplot figure illustrating how the mean value of num_votes varies by the popular genres for the titles with “christmas”.
Click to Check the Answer!
|>
holiday_movies_with_genres filter(genres %in% popular_genres$genres) |>
group_by(genres, christmas) |>
summarise(mean_rating = mean(average_rating),
mean_votes = mean(num_votes)) |>
filter(christmas == T) |>
ggplot(aes(x = mean_votes, y = fct_reorder(genres, mean_votes),
+
)) geom_point(size = 2) +
labs(y = "genres")
Question 5
- The following is the data.frame for Question 5.
<- read_csv("https://bcdanl.github.io/data/tripadvisor_cleaned.csv") tripadvisor
TripAdvisor is an online travel research company that empowers people around the world to plan and enjoy the ideal trip.
TripAdvisor wanted to know whether promoting membership on their platform could drive engagement and bookings.
To do so, TripAdvisor had just run an experiment to explore user retention by offering a random subset of customers an easier sign-up process for membership.
The following is the data.frame, tripadvisor.
Variable description
id: a unique identifier for a user.
time:
- PRE if time is before the experiment;
- POST if time is in the 28 days after the experiment.
- For each id value, there are two observations—one with time == “PRE” and the other with time == “POST”.
days_visited: Number of days a user visited the TripAdvisor website.
easier_signup:
- TRUE if a user was exposed to the easier signup process (e.g., one-click signup) during the experiment;
- FALSE otherwise.
became_member:
- TRUE if a user became a member during the experiment period;
- FALSE otherwise.
locale_en_US:
- TRUE if a user accessed the website from the US;
- FALSE otherwise.
os_type: Windows, Mac, or Others
revenue_pre: Amount of dollars a user spent on the website before the experiment
Q5a.
Using the given data.frame, tripadvisor, create the data.frame, tripadvisor, for which
- time is a factor-type variable of time with the first level, “PRE”.
Click to Check the Answer!
<- tripadvisor |>
tripadvisor mutate(time = factor(time, levels = c("PRE", "POST")),
os_type = factor(os_type, levels = c("Windows", "Mac", "Others") ) )
Q5b.
Provide R code to recreate the ggplot figure illustrating how the relationship between time and days_visited varies by easier_signup and became_member.
- Row-wise split is based on easier_signup.
- Column-wise split is based on became_member.
Click to Check the Answer!
ggplot(tripadvisor,
aes(y = days_visited,
x = time)) +
geom_boxplot(aes(fill = time) ) +
facet_grid(easier_signup ~ became_member)
Q5c.
- Provide a comment to illustrate how the relationship between time and days_visited varies by easier_signup and became_member.
Q5d.
- Provide a R code to create the data.frame Q5d that includes the variable diff, the difference between (1) the value of days_visited for time == PRE and (2) the value of days_visited for time == POST for each id.
Click to Check the Answer!
<- tripadvisor_wide <- tripadvisor |>
q2d pivot_wider(names_from = time,
values_from = days_visited) |>
relocate(PRE, POST, .after = id) |>
mutate(diff = POST - PRE, .before = PRE) |>
select(id:became_member)
- The resulting data.frame should look as follows:
- Include only the variables as shown above.
- Adjust the order of the variables as shown above.
Q5e.
- Provide an R code to calculate how the difference in the mean value of diff varies by easier_signup and became_member using the data.frame Q5d.
Click to Check the Answer!
<- q2d |>
q2e group_by(easier_signup, became_member) |>
summarise(mean_diff = round(mean(diff),2))
- The resulting data.frame should look as follows:
Q5f.
- Using the resulting data.frame in Q5e, discuss the following question:
- What is the effect of easier signup process on the number of days a user visited the TripAdvisor website?
- How does this effect varies by the status of became_member?
Question 6
In September 2019, YouGov survey asked 1,639 GB adults the following question:
In hindsight, do you think Britain was right/wrong to vote to leave EU?
- Right to leave
- Wrong to leave
- Don’t know
The data from the survey is in brexit.csv
.
<- read_csv('https://bcdanl.github.io/data/brexit.csv') brexit
datatable(brexit)
Q6a
- Replicate the following visualization
Click to Check the Answer!
<- brexit |>
brexit mutate(
region = fct_relevel(region,
"london", "rest_of_south", "midlands_wales", "north", "scot"),
region = fct_recode(region,
London = "london",
`Rest of South` = "rest_of_south",
`Midlands / Wales` = "midlands_wales",
North = "north",
Scotland = "scot")
)
ggplot(brexit,
aes(y = opinion, fill = opinion)) +
geom_bar() +
facet_wrap( ~ region,
nrow = 1,
labeller = label_wrap_gen(width = 12)) +
guides(fill = "none") +
labs(
title = "Was Britain right/wrong to vote to leave EU?",
subtitle = "YouGov Survey Results, 2-3 September 2019",
caption = "Source: bit.ly/2lCJZVg",
x = NULL, y = NULL
+
) scale_fill_manual(values = c(
"gray",
"#67a9cf",
"#ef8a62"
+
)) theme_minimal()
Q6b
- Replicate the following visualization
- How is the story this visualization telling different than the story the plot in Q6a?
Click to Check the Answer!
ggplot(brexit,
aes(y = opinion, fill = opinion)) +
geom_bar() +
facet_wrap(~region, scales = 'free_x',
nrow = 1, labeller = label_wrap_gen(width = 12),
# ___
+
) guides(fill = "none") +
labs(
title = "Was Britain right/wrong to vote to leave EU?",
subtitle = "YouGov Survey Results, 2-3 September 2019",
caption = "Source: bit.ly/2lCJZVg",
x = NULL, y = NULL
+
) scale_fill_manual(values = c(
"Wrong" = "#ef8a62",
"Right" = "#67a9cf",
"Don't know" = "gray"
+
)) theme_minimal()
Q6c
- First, calculate the proportion of wrong, right, and don’t know answers in each region and then plot these proportions (rather than the counts) and then improve axis labeling.
Click to Check the Answer!
<- brexit |>
q6 group_by(region, opinion) |>
summarise(n = n()) |>
mutate(tot = sum(n),
prop = n / tot )
- Replicate the following visualization
- How is the story this visualization telling different than the story the plot in Q4b?
Click to Check the Answer!
ggplot(q6, aes(y = opinion, x = prop,
fill = opinion)) +
geom_col() +
facet_wrap(~region,
nrow = 1, labeller = label_wrap_gen(width = 12),
# ___
+
) guides(fill = "none") +
labs(
title = "Was Britain right/wrong to vote to leave EU?",
subtitle = "YouGov Survey Results, 2-3 September 2019",
caption = "Source: bit.ly/2lCJZVg",
x = 'Percent', y = NULL
+
) scale_fill_manual(values = c(
"Wrong" = "#ef8a62",
"Right" = "#67a9cf",
"Don't know" = "gray"
+
)) scale_x_continuous(labels = scales::percent) +
theme_minimal()
Q6d.
Recreate the same visualization from the previous exercise, this time dodging the bars for opinion proportions for each region, rather than faceting by region and then improve the legend.
- How is the story this visualization telling different than the story the previous plot tells?
Click to Check the Answer!
ggplot(q6, aes(y = region, x = prop,
fill = opinion)) +
geom_col(position = "dodge") +
labs(
title = "Was Britain right/wrong to vote to leave EU?",
subtitle = "YouGov Survey Results, 2-3 September 2019",
caption = "Source: bit.ly/2lCJZVg",
x = 'Percent', y = NULL, fill = 'Opinion'
+
) scale_fill_manual(values = c(
"Wrong" = "#ef8a62",
"Right" = "#67a9cf",
"Don't know" = "gray"
+
)) scale_x_continuous(labels = scales::percent) +
theme_minimal()
Discussion
Welcome to our Classwork 7 Discussion Board! 👋
This space is designed for you to engage with your classmates about the material covered in Classwork 7.
Whether you are looking to delve deeper into the content, share insights, or have questions about the content, this is the perfect place for you.
If you have any specific questions for Byeong-Hak (@bcdanl) regarding the Classwork 7 materials or need clarification on any points, don’t hesitate to ask here.
All comments will be stored here.
Let’s collaborate and learn from each other!