Edit the code chunks below and knit the document. You can pipe your objects to glimpse()
or print()
to display them.
The following data table is not tidy. Use tibble()
or tribble()
to manually create the tidy version of this table.
# do not edit this chunk
untidy <- tribble(
~id, ~stats, ~p.value, ~conf.int,
"A", "t(26) = -0.424", 0.6749, "[-0.444, 0.292]",
"B", "t(19) = 0.754", 0.4600, "[-0.287, 0.610]",
"C", "t(19) = 4.289", 0.0004, "[ 0.374, 1.088]"
) %>% print()
## # A tibble: 3 x 4
## id stats p.value conf.int
## <chr> <chr> <dbl> <chr>
## 1 A t(26) = -0.424 0.675 [-0.444, 0.292]
## 2 B t(19) = 0.754 0.46 [-0.287, 0.610]
## 3 C t(19) = 4.289 0.0004 [ 0.374, 1.088]
# your version can have different column names in a different order
tidy <- tribble(
~id, ~df, ~t.value, ~p.value, ~conf.int.low, ~conf.int.high,
"A", 26, -0.424, 0.6749, -0.444, 0.292,
"B", 19, 0.754, 0.4600, -0.287, 0.610,
"C", 19, 4.289, 0.0004, 0.374, 1.088
) %>% print()
## # A tibble: 3 x 6
## id df t.value p.value conf.int.low conf.int.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 26 -0.424 0.675 -0.444 0.292
## 2 B 19 0.754 0.46 -0.287 0.61
## 3 C 19 4.29 0.0004 0.374 1.09
The questions in this section all have errors. Fix the errors.
Load the dataset dataskills::sensation_seeking as ss
.
ss <- read_csv(dataskills::sensation_seeking)
## Error: `file` must be a string, raw vector or a connection.
ss <- dataskills::sensation_seeking
## alternatively
ss <- read_csv("https://psyteachr.github.io/msc-data-skills/data/sensation_seeking.csv")
Convert from wide to long format.
ss_long <- ss %>%
pivot_longer(names_to = "question",
values_to = "score") %>%
glimpse()
## Error: `cols` must select at least one column.
ss_long <- ss %>%
pivot_longer(sss1:sss14,
names_to = "question",
values_to = "score") %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 3144, 3144, 3144, 3144, 3144, 3144,…
## $ user_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ date <date> 2006-05-07, 2006-05-07, 2006-05-07, 2006…
## $ question <chr> "sss1", "sss2", "sss3", "sss4", "sss5", "…
## $ score <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,…
Convert back to wide format. Make sure ss_wide
is the same as ss
.
ss_wide <- ss_long %>%
pivot_wider(question, score) %>%
glimpse()
## Rows: 14
## Columns: 1
## $ question <chr> "sss1", "sss2", "sss3", "sss4", "sss5", "…
ss_wide <- ss_long %>%
pivot_wider(names_from = question,
values_from = score) %>%
glimpse()
## Rows: 27,021
## Columns: 17
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, …
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, …
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-…
## $ sss1 <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, …
## $ sss2 <dbl> 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, …
## $ sss3 <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
## $ sss4 <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, …
## $ sss5 <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, …
## $ sss6 <dbl> 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, …
## $ sss7 <dbl> 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, …
## $ sss8 <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, …
## $ sss9 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, …
## $ sss10 <dbl> 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, …
## $ sss11 <dbl> 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ sss12 <dbl> 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ sss13 <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, …
## $ sss14 <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, …
The questions in this section all have errors. Fix the errors.
Use the gather()
function to convert ss
from wide to long.
ss_long <- gather(ss, "question", "score") %>%
glimpse()
## Warning: attributes are not identical across measure variables;
## they will be dropped
## Rows: 459,357
## Columns: 2
## $ question <chr> "id", "id", "id", "id", "id", "id", "id",…
## $ score <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8,…
ss_long <- gather(ss, "question", "score", sss1:sss14) %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8,…
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22,…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005…
## $ question <chr> "sss1", "sss1", "sss1", "sss1", "sss1", "…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,…
Split the question
column from ss_long
into two columns: domain
and qnumber
.
ss_sep <- ss_long %>%
separate(question, domain, qnumber, sep = 3) %>%
glimpse()
## Error in str_separate(value, into = into, sep = sep, convert = convert, : object 'domain' not found
ss_sep <- ss_long %>%
separate(question, c("domain", "qnumber"), sep = 3) %>%
glimpse()
## Rows: 378,294
## Columns: 6
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, …
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, …
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", …
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, …
Put the id
and user_id
columns together into a new column named super_id
. Make it in a format like “id-user_id”.
ss_unite <- ss_sep %>%
unite(id, user_id, "super_id", sep = "-") %>%
glimpse()
## Error: Can't subset columns that don't exist.
## [31mx[39m Column `super_id` doesn't exist.
ss_unite <- ss_sep %>%
unite("super_id", id, user_id, sep = "-") %>%
glimpse()
## Rows: 378,294
## Columns: 5
## $ super_id <chr> "3144-0", "133-1", "175-2", "285-5", "1-8…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss",…
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,…
Convert back to wide format. (N.B. the new question columns headers will just be numbers, not “sss#”)
ss_wide <- ss_unite %>%
spreadr(qnumber, score, ) %>%
glimpse()
## Error in spreadr(., qnumber, score, ): could not find function "spreadr"
ss_wide <- ss_unite %>%
spread(qnumber, score) %>%
glimpse()
## Rows: 27,021
## Columns: 17
## $ super_id <chr> "1-8", "10-23", "100-426", "10000-64553",…
## $ date <date> 2004-09-29, 2004-10-08, 2004-11-25, 2007…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss",…
## $ `1` <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,…
## $ `10` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ `11` <dbl> 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,…
## $ `12` <dbl> 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,…
## $ `13` <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,…
## $ `14` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,…
## $ `2` <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,…
## $ `3` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ `4` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,…
## $ `5` <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ `6` <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,…
## $ `7` <dbl> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,…
## $ `8` <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,…
## $ `9` <dbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,…
Re-write the following sequence of commands into a single ‘pipeline’.
# do not edit this chunk
x <- 1:20 # integers from 1:20
y <- rep(x, 2) # then repeat them twice
z <- sum(y) # and then take the sum
z
## [1] 420
x <- 1:20 %>% rep(2) %>% sum() %>% print()
## [1] 420
Deconstruct the pipeline below back into separate commands.
# do not edit this chunk
lager <- LETTERS[c(18, 5, 7, 1, 12)] %>%
rev() %>%
paste(collapse = "") %>%
print()
## [1] "LAGER"
regal <- LETTERS[c(18, 5, 7, 1, 12)]
reversed <- rev(regal)
lager <- paste(reversed, collapse = "") # make it into a string
lager
## [1] "LAGER"
Load the dataset dataskills::family_composition.
The columns oldbro
through twinsis
give the number of siblings of that age and sex. Put this into long format and create separate columns for sibling age (sibage
= old, young, twin) and sex (sibsex
= bro, sis).
family_pivot <- dataskills::family_composition %>%
pivot_longer(cols = oldbro:twinsis,
names_to = c("sibage", "sibsex"),
names_sep = -3,
values_to = "n") %>%
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, …
## $ sex <chr> "male", "male", "male", "male", "male", "m…
## $ age <dbl> 38.1, 38.1, 38.1, 38.1, 38.1, 38.1, 19.7, …
## $ momage <dbl> 25, 25, 25, 25, 25, 25, 29, 29, 29, 29, 29…
## $ dadage <dbl> 27, 27, 27, 27, 27, 27, 31, 31, 31, 31, 31…
## $ sibage <chr> "old", "old", "young", "young", "twin", "t…
## $ sibsex <chr> "bro", "sis", "bro", "sis", "bro", "sis", …
## $ n <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, …
family_tidy <- dataskills::family_composition %>%
gather("sibtype", "n", oldbro:twinsis) %>%
separate(sibtype, c("sibage", "sibsex"), sep = -3) %>%
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 67, 98, 103, 164, 233, 235, 253, 256, 2…
## $ sex <chr> "male", "female", "female", "female", "fem…
## $ age <dbl> 38.1, 19.7, 19.4, 20.6, 20.3, 19.3, 18.7, …
## $ momage <dbl> 25, 29, NA, NA, 24, NA, NA, 24, NA, 21, 28…
## $ dadage <dbl> 27, 31, NA, NA, NA, NA, NA, 25, NA, 22, NA…
## $ sibage <chr> "old", "old", "old", "old", "old", "old", …
## $ sibsex <chr> "bro", "bro", "bro", "bro", "bro", "bro", …
## $ n <dbl> 0, 1, 1, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, …
Tidy the data from dataskills::eye_descriptions. This dataset contains descriptions of the eyes of 50 people by 220 raters (user_id
). Some raters wrote more than one description per face (maximum 4), separated by commas, semicolons, or slashes.
Create a dataset with separate columns for face_id
, description
, and description number (desc_n
).
Hint: to separate a string by tildes or commas, you would set the sep
argument to "(~|,)+"
.
eyes <- dataskills::eye_descriptions %>%
gather("face_id", "description", t1:t50) %>%
separate(description, c("d1", "d2", "d3", "d4"), sep = "(,|;|\\/)+", fill = "right") %>%
gather("desc_n", "description", d1:d4) %>%
filter(!is.na(description)) %>% # gets rid of rows with no description
glimpse()
## Rows: 12,304
## Columns: 6
## $ user_id <dbl> 508844, 508966, 508976, 509196, 509286…
## $ sex <chr> "male", "female", "female", "female", …
## $ age <dbl> 19.0, 20.4, 24.8, 14.6, 16.7, NA, 36.2…
## $ face_id <chr> "t1", "t1", "t1", "t1", "t1", "t1", "t…
## $ desc_n <chr> "d1", "d1", "d1", "d1", "d1", "d1", "d…
## $ description <chr> "empty", "bored", "Dark high on drugs"…