Edit the code chunks below and knit the document. You can pipe your
objects to glimpse()
or print()
to display
them.
The following data table is not tidy. Use tibble()
or
tribble()
to manually create the tidy version of this
table.
# do not edit this chunk
untidy <- tribble(
~id, ~stats, ~p.value, ~conf.int,
"A", "t(26) = -0.424", 0.6749, "[-0.444, 0.292]",
"B", "t(19) = 0.754", 0.4600, "[-0.287, 0.610]",
"C", "t(19) = 4.289", 0.0004, "[ 0.374, 1.088]"
) |> print()
## # A tibble: 3 × 4
## id stats p.value conf.int
## <chr> <chr> <dbl> <chr>
## 1 A t(26) = -0.424 0.675 [-0.444, 0.292]
## 2 B t(19) = 0.754 0.46 [-0.287, 0.610]
## 3 C t(19) = 4.289 0.0004 [ 0.374, 1.088]
# your version can have different column names in a different order
tidy <- tribble(
~id, ~df, ~t.value, ~p.value, ~conf.int.low, ~conf.int.high,
"A", 26, -0.424, 0.6749, -0.444, 0.292,
"B", 19, 0.754, 0.4600, -0.287, 0.610,
"C", 19, 4.289, 0.0004, 0.374, 1.088
) |> print()
## # A tibble: 3 × 6
## id df t.value p.value conf.int.low conf.int.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 26 -0.424 0.675 -0.444 0.292
## 2 B 19 0.754 0.46 -0.287 0.61
## 3 C 19 4.29 0.0004 0.374 1.09
The questions in this section all have errors. Fix the errors.
Load the dataset reprores::sensation_seeking
as ss
.
# has an error
ss <- read_csv(reprores::sensation_seeking)
## Error in (function (con, rw = "") : invalid connection
# corrects the error
ss <- reprores::sensation_seeking
## alternatively
ss <- read_csv("https://psyteachr.github.io/reprores/data/sensation_seeking.csv")
Convert from wide to long format.
# has an error
ss_long <- ss |>
pivot_longer(names_to = "question",
values_to = "score") |>
glimpse()
## Error in `build_longer_spec()`:
## ! `cols` must select at least one column.
# corrects the error
ss_long <- ss |>
pivot_longer(sss1:sss14,
names_to = "question",
values_to = "score") |>
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 3144, 1…
## $ user_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2…
## $ date <date> 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, 2006-05-07, …
## $ question <chr> "sss1", "sss2", "sss3", "sss4", "sss5", "sss6", "sss7", "sss8", "sss9", "sss10", "sss…
## $ score <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0…
Convert back to wide format. Make sure ss_wide
is the
same as ss
.
# has an error
ss_wide <- ss_long |>
pivot_wider(question, score) |>
glimpse()
## Error in `chr_as_locations()`:
## ! Can't subset columns that don't exist.
## ✖ Columns `white`, `aliceblue`, `antiquewhite`, `antiquewhite1`, `antiquewhite2`, etc. don't exist.
# corrects the error
ss_wide <- ss_long |>
pivot_wider(names_from = question,
values_from = score) |>
glimpse()
## Rows: 27,021
## Columns: 17
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15, 16, 17, 18, 19, 2…
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 37, 47, …
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004-09-30, 2004-10-01, 2…
## $ sss1 <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,…
## $ sss2 <dbl> 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,…
## $ sss3 <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,…
## $ sss4 <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sss5 <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,…
## $ sss6 <dbl> 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,…
## $ sss7 <dbl> 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,…
## $ sss8 <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,…
## $ sss9 <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0,…
## $ sss10 <dbl> 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,…
## $ sss11 <dbl> 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,…
## $ sss12 <dbl> 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ sss13 <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1,…
## $ sss14 <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,…
The questions in this section all have errors. Fix the errors.
Use the gather()
function to convert ss
from wide to long.
# has an error
ss_long <- gather(ss, "question", "score") |>
glimpse()
## Rows: 459,357
## Columns: 2
## $ question <chr> "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "id", "…
## $ score <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15, 16, 17, 18, 19, …
# corrects the error
ss_long <- gather(ss, "question", "score", sss1:sss14) |>
glimpse()
## Rows: 378,294
## Columns: 5
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15, 16, 17, 18, 19, …
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 37, 47,…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004-09-30, 2004-10-01, …
## $ question <chr> "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1", "sss1…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0…
Split the question
column from ss_long
into
two columns: domain
and qnumber
.
# has an error
ss_sep <- ss_long |>
separate(question, domain, qnumber, sep = 3) |>
glimpse()
## Error in str_separate(value, into = into, sep = sep, convert = convert, : object 'domain' not found
# corrects the error
ss_sep <- ss_long |>
separate(question, c("domain", "qnumber"), sep = 3) |>
glimpse()
## Rows: 378,294
## Columns: 6
## $ id <dbl> 3144, 133, 175, 285, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15, 16, 17, 18, 19, 2…
## $ user_id <dbl> 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 37, 47, …
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004-09-30, 2004-10-01, 2…
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "s…
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "…
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,…
Put the id
and user_id
columns together
into a new column named super_id
. Make it in a format like
“id-user_id”.
# has an error
ss_unite <- ss_sep |>
unite(id, user_id, "super_id", sep = "-") |>
glimpse()
## Error in `chr_as_locations()`:
## ! Can't subset columns that don't exist.
## ✖ Column `super_id` doesn't exist.
# corrects the error
ss_unite <- ss_sep |>
unite("super_id", id, user_id, sep = "-") |>
glimpse()
## Rows: 378,294
## Columns: 5
## $ super_id <chr> "3144-0", "133-1", "175-2", "285-5", "1-8", "3-9", "4-10", "5-17", "6-19", "7-20", "8…
## $ date <date> 2006-05-07, 2004-12-08, 2005-01-14, 2005-02-22, 2004-09-29, 2004-09-30, 2004-10-01, …
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "…
## $ qnumber <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ score <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0…
Convert back to wide format. (N.B. the new question columns headers will just be numbers, not “sss#”)
# has an error
ss_wide <- ss_unite |>
spreadr(qnumber, score, ) |>
glimpse()
## Error in spreadr(ss_unite, qnumber, score, ): could not find function "spreadr"
# corrects the error
ss_wide <- ss_unite |>
spread(qnumber, score) |>
glimpse()
## Rows: 27,021
## Columns: 17
## $ super_id <chr> "1-8", "10-23", "100-426", "10000-64553", "10001-64554", "10002-64555", "10003-62492"…
## $ date <date> 2004-09-29, 2004-10-08, 2004-11-25, 2007-01-25, 2007-01-25, 2007-01-25, 2007-01-25, …
## $ domain <chr> "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "sss", "…
## $ `1` <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1…
## $ `10` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1…
## $ `11` <dbl> 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1…
## $ `12` <dbl> 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1…
## $ `13` <dbl> 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1…
## $ `14` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ `2` <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1…
## $ `3` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0…
## $ `4` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `5` <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ `6` <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1…
## $ `7` <dbl> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1…
## $ `8` <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1…
## $ `9` <dbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
Re-write the following sequence of commands into a single ‘pipeline’.
# do not edit this chunk
x <- 1:20 # integers from 1:20
y <- rep(x, 2) # then repeat them twice
z <- sum(y) # and then take the sum
z
## [1] 420
x <- 1:20 |> rep(2) |> sum() |> print()
## [1] 420
Deconstruct the pipeline below back into separate commands.
# do not edit this chunk
lager <- LETTERS[c(18, 5, 7, 1, 12)] |>
rev() |>
paste(collapse = "") |>
print()
## [1] "LAGER"
regal <- LETTERS[c(18, 5, 7, 1, 12)]
reversed <- rev(regal)
lager <- paste(reversed, collapse = "") # make it into a string
lager
## [1] "LAGER"
Load the dataset reprores::family_composition.
The columns oldbro
through twinsis
give the
number of siblings of that age and sex. Put this into long format and
create separate columns for sibling age (sibage
= old,
young, twin) and sex (sibsex
= bro, sis).
family_pivot <- reprores::family_composition |>
pivot_longer(cols = oldbro:twinsis,
names_to = c("sibage", "sibsex"),
names_sep = -3,
values_to = "n") |>
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 98, 98, 98, 98, 98, 98, 103, 103, 103, 103, …
## $ sex <chr> "male", "male", "male", "male", "male", "male", "female", "female", "female", "female"…
## $ age <dbl> 38.1, 38.1, 38.1, 38.1, 38.1, 38.1, 19.7, 19.7, 19.7, 19.7, 19.7, 19.7, 19.4, 19.4, 19…
## $ momage <dbl> 25, 25, 25, 25, 25, 25, 29, 29, 29, 29, 29, 29, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ dadage <dbl> 27, 27, 27, 27, 27, 27, 31, 31, 31, 31, 31, 31, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ sibage <chr> "old", "old", "young", "young", "twin", "twin", "old", "old", "young", "young", "twin"…
## $ sibsex <chr> "bro", "sis", "bro", "sis", "bro", "sis", "bro", "sis", "bro", "sis", "bro", "sis", "b…
## $ n <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
family_tidy <- reprores::family_composition |>
gather("sibtype", "n", oldbro:twinsis) |>
separate(sibtype, c("sibage", "sibsex"), sep = -3) |>
glimpse()
## Rows: 115,014
## Columns: 8
## $ user_id <dbl> 8, 67, 98, 103, 164, 233, 235, 253, 256, 271, 298, 332, 426, 429, 434, 436, 450, 452, …
## $ sex <chr> "male", "female", "female", "female", "female", "female", "male", "female", "female", …
## $ age <dbl> 38.1, 19.7, 19.4, 20.6, 20.3, 19.3, 18.7, 19.5, 19.7, 24.5, 17.7, 19.6, 19.2, 19.8, 18…
## $ momage <dbl> 25, 29, NA, NA, 24, NA, NA, 24, NA, 21, 28, NA, NA, NA, NA, NA, NA, NA, 26, NA, 27, NA…
## $ dadage <dbl> 27, 31, NA, NA, NA, NA, NA, 25, NA, 22, NA, NA, NA, NA, NA, NA, NA, NA, 30, NA, 34, NA…
## $ sibage <chr> "old", "old", "old", "old", "old", "old", "old", "old", "old", "old", "old", "old", "o…
## $ sibsex <chr> "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "bro", "b…
## $ n <dbl> 0, 1, 1, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,…
Tidy the data from reprores::eye_descriptions.
This dataset contains descriptions of the eyes of 50 people by 220
raters (user_id
). Some raters wrote more than one
description per face (maximum 4), separated by commas, semicolons, or
slashes.
Create a dataset with separate columns for face_id
,
description
, and description number
(desc_n
).
Hint: to separate a string by tildes or commas, you would set the
sep
argument to "(~|,)+"
.
eyes <- reprores::eye_descriptions |>
gather("face_id", "description", t1:t50) |>
separate(description, c("d1", "d2", "d3", "d4"), sep = "(,|;|\\/)+", fill = "right") |>
gather("desc_n", "description", d1:d4) |>
filter(!is.na(description)) |> # gets rid of rows with no description
glimpse()
## Rows: 12,304
## Columns: 6
## $ user_id <dbl> 508844, 508966, 508976, 509196, 509286, 509400, 509503, 509665, 509787, 509848, 51…
## $ sex <chr> "male", "female", "female", "female", "female", "male", "female", "female", "femal…
## $ age <dbl> 19.0, 20.4, 24.8, 14.6, 16.7, NA, 36.2, 20.7, 5.0, 18.2, 17.4, 18.8, 34.5, 18.4, 1…
## $ face_id <chr> "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1", "t1"…
## $ desc_n <chr> "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1", "d1"…
## $ description <chr> "empty", "bored", "Dark high on drugs", "soft brown", "brown", "wide", "brown tire…