Edit the code chunks below and knit the document. You can pipe your objects to glimpse()
or print()
to display them.
The following data table is not tidy. Use tibble()
or tribble()
to manually create the tidy version of this table.
# do not edit this chunk
untidy <- tribble(
~id, ~stats, ~p.value, ~conf.int,
"A", "t(26) = -0.424", 0.6749, "[-0.444, 0.292]",
"B", "t(19) = 0.754", 0.4600, "[-0.287, 0.610]",
"C", "t(19) = 4.289", 0.0004, "[ 0.374, 1.088]"
) %>% print()
## # A tibble: 3 x 4
## id stats p.value conf.int
## <chr> <chr> <dbl> <chr>
## 1 A t(26) = -0.424 0.675 [-0.444, 0.292]
## 2 B t(19) = 0.754 0.46 [-0.287, 0.610]
## 3 C t(19) = 4.289 0.0004 [ 0.374, 1.088]
# your version can have different column names in a different order
tidy <- tribble(
~id, ~df, ~t.value, ~p.value, ~conf.int.low, ~conf.int.high,
"A", 26, -0.424, 0.6749, -0.444, 0.292,
"B", 19, 0.754, 0.4600, -0.287, 0.610,
"C", 19, 4.289, 0.0004, 0.374, 1.088
) %>% print()
## # A tibble: 3 x 6
## id df t.value p.value conf.int.low conf.int.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 26 -0.424 0.675 -0.444 0.292
## 2 B 19 0.754 0.46 -0.287 0.61
## 3 C 19 4.29 0.0004 0.374 1.09
The questions in this section all have errors. Fix the errors.
Load the dataset reprores::sensation_seeking as ss
.
# has an error
ss <- read_csv(reprores::sensation_seeking)
## Error: `file` must be a string, raw vector or a connection.
# corrects the error
ss <- reprores::sensation_seeking
## alternatively
ss <- read_csv("https://psyteachr.github.io/reprores/data/sensation_seeking.csv")
Convert from wide to long format.
# has an error
ss_long <- ss %>%
pivot_longer(names_to = "question",
values_to = "score") %>%
glimpse()
## Error: `cols` must select at least one column.
# corrects the error
ss_long <- ss %>%
pivot_longer(sss1:sss14,
names_to = "question",
values_to = "score") %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144…
## $ user_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1…
## $ date <date> 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, 200…
## $ question <chr> "sss1", "sss2", "sss3", "sss4", "sss5", "sss6", "sss7", "sss8", …
## $ score <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1…
Convert back to wide format. Make sure ss_wide
is the same as ss
.
# has an error
ss_wide <- ss_long %>%
pivot_wider(question, score) %>%
glimpse()
## Rows: 14
## Columns: 1
## $ question <chr> "sss1", "sss2", "sss3", "sss4", "sss5", "sss6", "sss7", "sss8", …
# corrects the error
ss_wide <- ss_long %>%
pivot_wider(names_from = question,
values_from = score) %>%
glimpse()
## Rows: 27,021
## Columns: 17
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, …
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32,…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004…
## $ sss1 <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,…
## $ sss2 <dbl> 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,…
## $ sss3 <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,…
## $ sss4 <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ sss5 <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,…
## $ sss6 <dbl> 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,…
## $ sss7 <dbl> 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,…
## $ sss8 <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,…
## $ sss9 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,…
## $ sss10 <dbl> 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,…
## $ sss11 <dbl> 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ sss12 <dbl> 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,…
## $ sss13 <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1,…
## $ sss14 <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,…
The questions in this section all have errors. Fix the errors.
Use the gather()
function to convert ss
from wide to long.
# has an error
ss_long <- gather(ss, "question", "score") %>%
glimpse()
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Rows: 459,357
## Columns: 2
## $ question <chr> "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "id"…
## $ score <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14,…
# corrects the error
ss_long <- gather(ss, "question", "score", sss1:sss14) %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14,…
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 200…
## $ question <chr> "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", …
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1…
Split the question
column from ss_long
into two columns: domain
and qnumber
.
# has an error
ss_sep <- ss_long %>%
separate(question, domain, qnumber, sep = 3) %>%
glimpse()
## Error in str_separate(value, into = into, sep = sep, convert = convert, : object 'domain' not found
# corrects the error
ss_sep <- ss_long %>%
separate(question, c("domain", "qnumber"), sep = 3) %>%
glimpse()
## Rows: 378,294
## Columns: 6
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, …
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32,…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "s…
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,…
Put the id
and user_id
columns together into a new column named super_id
. Make it in a format like “id-user_id”.
# has an error
ss_unite <- ss_sep %>%
unite(id, user_id, "super_id", sep = "-") %>%
glimpse()
## Error: Can't subset columns that don't exist.
## x Column `super_id` doesn't exist.
# corrects the error
ss_unite <- ss_sep %>%
unite("super_id", id, user_id, sep = "-") %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ super_id <chr> "3144-0", "133-1", "175-2", "285-5", "1-8", "3-9", "4-10", "5-17…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 200…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "…
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1…
Convert back to wide format. (N.B. the new question columns headers will just be numbers, not “sss#”)
# has an error
ss_wide <- ss_unite %>%
spreadr(qnumber, score, ) %>%
glimpse()
## Error in spreadr(., qnumber, score, ): could not find function "spreadr"
# corrects the error
ss_wide <- ss_unite %>%
spread(qnumber, score) %>%
glimpse()
## Rows: 27,021
## Columns: 17
## $ super_id <chr> "1-8", "10-23", "100-426", "10000-64553", "10001-64554", "10002-…
## $ date <date> 2004-09-29, 2004-10-08, 2004-11-25, 2007-01-25, 2007-01-25, 200…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "…
## $ `1` <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0…
## $ `10` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0…
## $ `11` <dbl> 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0…
## $ `12` <dbl> 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0…
## $ `13` <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0…
## $ `14` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `2` <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0…
## $ `3` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0…
## $ `4` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0…
## $ `5` <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1…
## $ `6` <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1…
## $ `7` <dbl> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1…
## $ `8` <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ `9` <dbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1…
Re-write the following sequence of commands into a single ‘pipeline’.
# do not edit this chunk
x <- 1:20 # integers from 1:20
y <- rep(x, 2) # then repeat them twice
z <- sum(y) # and then take the sum
z
## [1] 420
x <- 1:20 %>% rep(2) %>% sum() %>% print()
## [1] 420
Deconstruct the pipeline below back into separate commands.
# do not edit this chunk
lager <- LETTERS[c(18, 5, 7, 1, 12)] %>%
rev() %>%
paste(collapse = "") %>%
print()
## [1] "LAGER"
regal <- LETTERS[c(18, 5, 7, 1, 12)]
reversed <- rev(regal)
lager <- paste(reversed, collapse = "") # make it into a string
lager
## [1] "LAGER"
Load the dataset reprores::family_composition.
The columns oldbro
through twinsis
give the number of siblings of that age and sex. Put this into long format and create separate columns for sibling age (sibage
= old, young, twin) and sex (sibsex
= bro, sis).
family_pivot <- reprores::family_composition %>%
pivot_longer(cols = oldbro:twinsis,
names_to = c("sibage", "sibsex"),
names_sep = -3,
values_to = "n") %>%
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 98, 98, 98, 98, 98, 98,…
## $ sex <chr> "male", "male", "male", "male", "male", "male", "female", "female…
## $ age <dbl> 38.1, 38.1, 38.1, 38.1, 38.1, 38.1, 19.7, 19.7, 19.7, 19.7, 19.7,…
## $ momage <dbl> 25, 25, 25, 25, 25, 25, 29, 29, 29, 29, 29, 29, NA, NA, NA, NA, N…
## $ dadage <dbl> 27, 27, 27, 27, 27, 27, 31, 31, 31, 31, 31, 31, NA, NA, NA, NA, N…
## $ sibage <chr> "old", "old", "young", "young", "twin", "twin", "old", "old", "yo…
## $ sibsex <chr> "bro", "sis", "bro", "sis", "bro", "sis", "bro", "sis", "bro", "s…
## $ n <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 0,…
family_tidy <- reprores::family_composition %>%
gather("sibtype", "n", oldbro:twinsis) %>%
separate(sibtype, c("sibage", "sibsex"), sep = -3) %>%
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 67, 98, 103, 164, 233, 235, 253, 256, 271, 298, 332, 426, 429,…
## $ sex <chr> "male", "female", "female", "female", "female", "female", "male",…
## $ age <dbl> 38.1, 19.7, 19.4, 20.6, 20.3, 19.3, 18.7, 19.5, 19.7, 24.5, 17.7,…
## $ momage <dbl> 25, 29, NA, NA, 24, NA, NA, 24, NA, 21, 28, NA, NA, NA, NA, NA, N…
## $ dadage <dbl> 27, 31, NA, NA, NA, NA, NA, 25, NA, 22, NA, NA, NA, NA, NA, NA, N…
## $ sibage <chr> "old", "old", "old", "old", "old", "old", "old", "old", "old", "o…
## $ sibsex <chr> "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "b…
## $ n <dbl> 0, 1, 1, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 0, 0,…
Tidy the data from reprores::eye_descriptions. This dataset contains descriptions of the eyes of 50 people by 220 raters (user_id
). Some raters wrote more than one description per face (maximum 4), separated by commas, semicolons, or slashes.
Create a dataset with separate columns for face_id
, description
, and description number (desc_n
).
Hint: to separate a string by tildes or commas, you would set the sep
argument to "(~|,)+"
.
eyes <- reprores::eye_descriptions %>%
gather("face_id", "description", t1:t50) %>%
separate(description, c("d1", "d2", "d3", "d4"), sep = "(,|;|\\/)+", fill = "right") %>%
gather("desc_n", "description", d1:d4) %>%
filter(!is.na(description)) %>% # gets rid of rows with no description
glimpse()
## Rows: 12,304
## Columns: 6
## $ user_id <dbl> 508844, 508966, 508976, 509196, 509286, 509400, 509503, 50966…
## $ sex <chr> "male", "female", "female", "female", "female", "male", "fema…
## $ age <dbl> 19.0, 20.4, 24.8, 14.6, 16.7, NA, 36.2, 20.7, 5.0, 18.2, 17.4…
## $ face_id <chr> "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "…
## $ desc_n <chr> "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "…
## $ description <chr> "empty", "bored", "Dark high on drugs", "soft brown", "brown"…