Edit the code chunks below and knit the document. You can pipe your
objects to glimpse()
or print()
to display
them.
Load the following data from the reprores package (or access the
linked CSV files online). Each participant is identified by a unique
user_id
.
data("disgust_scores")
data("personality_scores")
data("users")
# or
disgust_scores <- read_csv("https://psyteachr.github.io/reprores/data/disgust_scores.csv")
personality_scores <- read_csv("https://psyteachr.github.io/reprores/data/personality_scores.csv")
users <- read_csv("https://psyteachr.github.io/reprores/data/users.csv")
Add users
data to the disgust_scores
table.
study1 <- left_join(disgust_scores, users, by = "user_id")
glimpse(study1)
## Rows: 20,000
## Columns: 8
## $ id [3m[38;5;246m<dbl>[39m[23m 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, …
## $ user_id [3m[38;5;246m<dbl>[39m[23m 1, 155324, 155366, 155370, 155386, 155409, 155427, 155425, 155429, 155450, 155451, 1…
## $ date [3m[38;5;246m<date>[39m[23m 2008-07-10, 2008-07-11, 2008-07-12, 2008-07-12, 2008-07-12, 2008-07-12, 2008-07-13,…
## $ moral [3m[38;5;246m<dbl>[39m[23m 1.428571, 3.000000, 5.571429, 5.714286, 1.428571, 4.142857, 3.285714, 4.428571, 3.71…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 2.714286, 2.571429, 4.000000, 4.857143, 3.857143, 4.142857, 5.285714, 3.857143, 5.42…
## $ sexual [3m[38;5;246m<dbl>[39m[23m 1.7142857, 1.8571429, 0.4285714, 4.7142857, 3.7142857, 1.5714286, 3.0000000, 2.42857…
## $ sex [3m[38;5;246m<chr>[39m[23m "female", "female", "male", "female", "male", "male", "female", "female", "female", …
## $ birthyear [3m[38;5;246m<dbl>[39m[23m 1976, 1984, 1982, 1968, 1983, 1983, 1987, 1978, 1986, 1970, 1986, 1985, 1966, 1984, …
Add the users
data to the disgust_scores
data, but have the columns from the participant table first.
study2 <- right_join(users, disgust_scores, by = "user_id")
glimpse(study2)
## Rows: 20,000
## Columns: 8
## $ user_id [3m[38;5;246m<dbl>[39m[23m 0, 1, 2, 2118, 2311, 3630, 4458, 4651, 4976, 5469, 6066, 6093, 6172, 7129, 7646, 837…
## $ sex [3m[38;5;246m<chr>[39m[23m NA, "female", "male", "female", "male", "male", "female", "female", "male", "female"…
## $ birthyear [3m[38;5;246m<dbl>[39m[23m NA, 1976, 1985, 1985, 1982, 1968, 1933, 1979, 1981, 1988, 1977, 1965, 1987, 1988, 19…
## $ id [3m[38;5;246m<dbl>[39m[23m 1199, 1, 1599, 13332, 23, 1160, 7980, 552, 37829, 6902, 6158, 4850, 1134, 6362, 2248…
## $ date [3m[38;5;246m<date>[39m[23m 2008-10-07, 2008-07-10, 2008-10-27, 2012-01-02, 2008-07-15, 2008-10-06, 2011-09-05,…
## $ moral [3m[38;5;246m<dbl>[39m[23m 5.2857143, 1.4285714, NA, 1.0000000, 4.0000000, NA, 3.4285714, 3.8571429, 3.4285714,…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 4.714286, 2.714286, NA, 5.000000, 4.285714, 2.142857, 3.571429, 4.857143, 4.285714, …
## $ sexual [3m[38;5;246m<dbl>[39m[23m 2.1428571, 1.7142857, NA, 3.0000000, 1.8571429, 1.1428571, 3.0000000, 4.2857143, 0.0…
Create a table with only disgust_scores
and
personality_scores
data from the same user_id
collected on the same date
.
study3 <- inner_join(disgust_scores, personality_scores,
by = c("user_id", "date"))
glimpse(study3)
## Rows: 555
## Columns: 11
## $ id [3m[38;5;246m<dbl>[39m[23m 3, 6, 17, 18, 21, 22, 24, 25, 32, 33, 34, 37, 39, 43, 44, 46, 47, 49, 51, 54, 55, 56,…
## $ user_id [3m[38;5;246m<dbl>[39m[23m 155324, 155386, 155567, 155571, 155665, 155682, 155712, 155764, 155864, 155880, 15588…
## $ date [3m[38;5;246m<date>[39m[23m 2008-07-11, 2008-07-12, 2008-07-14, 2008-07-14, 2008-07-15, 2008-07-15, 2008-07-15, …
## $ moral [3m[38;5;246m<dbl>[39m[23m 3.000000, 1.428571, 5.571429, 2.714286, 4.142857, 2.714286, 4.428571, 2.571429, 5.571…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 2.571429, 3.857143, 4.714286, 6.000000, 4.142857, 3.000000, 4.000000, 4.571429, 4.571…
## $ sexual [3m[38;5;246m<dbl>[39m[23m 1.8571429, 3.7142857, 2.5714286, 4.4285714, 3.4285714, 0.7142857, 1.1428571, 4.285714…
## $ Ag [3m[38;5;246m<dbl>[39m[23m 4.000000, 3.142857, 5.285714, 3.714286, 2.857143, 3.428571, 3.571429, 2.142857, 4.285…
## $ Co [3m[38;5;246m<dbl>[39m[23m 3.300000, 2.600000, 5.700000, 3.800000, 1.800000, 3.000000, 4.000000, 4.600000, 3.200…
## $ Ex [3m[38;5;246m<dbl>[39m[23m 4.8888889, 4.0000000, 3.8888889, 4.5555556, 4.6666667, 3.5555556, 2.7777778, 3.666666…
## $ Ne [3m[38;5;246m<dbl>[39m[23m 2.375000, 0.250000, 1.125000, 2.250000, 3.125000, 1.375000, 3.375000, 3.000000, 4.375…
## $ Op [3m[38;5;246m<dbl>[39m[23m 4.714286, 5.142857, 3.142857, 2.857143, 4.571429, 4.857143, 5.285714, 3.857143, 5.142…
Join data from the same user_id
, regardless of
date
. Does this give you the same data table as above?
study3_nodate <- inner_join(disgust_scores, personality_scores,
by = c("user_id"))
glimpse(study3_nodate)
## Rows: 677
## Columns: 12
## $ id [3m[38;5;246m<dbl>[39m[23m 1, 3, 6, 17, 18, 20, 21, 22, 24, 25, 32, 33, 34, 35, 36, 37, 39, 41, 43, 44, 46, 47, …
## $ user_id [3m[38;5;246m<dbl>[39m[23m 1, 155324, 155386, 155567, 155571, 124756, 155665, 155682, 155712, 155764, 155864, 15…
## $ date.x [3m[38;5;246m<date>[39m[23m 2008-07-10, 2008-07-11, 2008-07-12, 2008-07-14, 2008-07-14, 2008-07-14, 2008-07-15, …
## $ moral [3m[38;5;246m<dbl>[39m[23m 1.428571, 3.000000, 1.428571, 5.571429, 2.714286, 5.428571, 4.142857, 2.714286, 4.428…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 2.714286, 2.571429, 3.857143, 4.714286, 6.000000, 5.142857, 4.142857, 3.000000, 4.000…
## $ sexual [3m[38;5;246m<dbl>[39m[23m 1.7142857, 1.8571429, 3.7142857, 2.5714286, 4.4285714, 2.7142857, 3.4285714, 0.714285…
## $ date.y [3m[38;5;246m<date>[39m[23m 2006-02-08, 2008-07-11, 2008-07-12, 2008-07-14, 2008-07-14, 2008-01-23, 2008-07-15, …
## $ Ag [3m[38;5;246m<dbl>[39m[23m 2.571429, 4.000000, 3.142857, 5.285714, 3.714286, 4.857143, 2.857143, 3.428571, 3.571…
## $ Co [3m[38;5;246m<dbl>[39m[23m 3.000000, 3.300000, 2.600000, 5.700000, 3.800000, 3.800000, 1.800000, 3.000000, 4.000…
## $ Ex [3m[38;5;246m<dbl>[39m[23m 2.6666667, 4.8888889, 4.0000000, 3.8888889, 4.5555556, 2.1111111, 4.6666667, 3.555555…
## $ Ne [3m[38;5;246m<dbl>[39m[23m 2.250000, 2.375000, 0.250000, 1.125000, 2.250000, 3.375000, 3.125000, 1.375000, 3.375…
## $ Op [3m[38;5;246m<dbl>[39m[23m 4.285714, 4.714286, 5.142857, 3.142857, 2.857143, 5.285714, 4.571429, 4.857143, 5.285…
Create a table of the disgust_scores
and
personality_scores
data containing all of the data
from both tables.
study4 <- full_join(disgust_scores, personality_scores,
by = c("user_id", "date"))
glimpse(study4)
## Rows: 34,445
## Columns: 11
## $ id [3m[38;5;246m<dbl>[39m[23m 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 2…
## $ user_id [3m[38;5;246m<dbl>[39m[23m 1, 155324, 155366, 155370, 155386, 155409, 155427, 155425, 155429, 155450, 155451, 15…
## $ date [3m[38;5;246m<date>[39m[23m 2008-07-10, 2008-07-11, 2008-07-12, 2008-07-12, 2008-07-12, 2008-07-12, 2008-07-13, …
## $ moral [3m[38;5;246m<dbl>[39m[23m 1.428571, 3.000000, 5.571429, 5.714286, 1.428571, 4.142857, 3.285714, 4.428571, 3.714…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 2.714286, 2.571429, 4.000000, 4.857143, 3.857143, 4.142857, 5.285714, 3.857143, 5.428…
## $ sexual [3m[38;5;246m<dbl>[39m[23m 1.7142857, 1.8571429, 0.4285714, 4.7142857, 3.7142857, 1.5714286, 3.0000000, 2.428571…
## $ Ag [3m[38;5;246m<dbl>[39m[23m NA, 4.000000, NA, NA, 3.142857, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5.285714, 3.7…
## $ Co [3m[38;5;246m<dbl>[39m[23m NA, 3.3, NA, NA, 2.6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5.7, 3.8, NA, NA, 1.8, …
## $ Ex [3m[38;5;246m<dbl>[39m[23m NA, 4.888889, NA, NA, 4.000000, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.888889, 4.5…
## $ Ne [3m[38;5;246m<dbl>[39m[23m NA, 2.375, NA, NA, 0.250, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.125, 2.250, NA, N…
## $ Op [3m[38;5;246m<dbl>[39m[23m NA, 4.714286, NA, NA, 5.142857, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.142857, 2.8…
Create a table of just the data from the disgust_scores
table for users who completed the personality_scores
questionnaire that same day.
study5 <- semi_join(disgust_scores, personality_scores,
by = c("user_id", "date"))
glimpse(study5)
## Rows: 555
## Columns: 6
## $ id [3m[38;5;246m<dbl>[39m[23m 3, 6, 17, 18, 21, 22, 24, 25, 32, 33, 34, 37, 39, 43, 44, 46, 47, 49, 51, 54, 55, 56,…
## $ user_id [3m[38;5;246m<dbl>[39m[23m 155324, 155386, 155567, 155571, 155665, 155682, 155712, 155764, 155864, 155880, 15588…
## $ date [3m[38;5;246m<date>[39m[23m 2008-07-11, 2008-07-12, 2008-07-14, 2008-07-14, 2008-07-15, 2008-07-15, 2008-07-15, …
## $ moral [3m[38;5;246m<dbl>[39m[23m 3.000000, 1.428571, 5.571429, 2.714286, 4.142857, 2.714286, 4.428571, 2.571429, 5.571…
## $ pathogen [3m[38;5;246m<dbl>[39m[23m 2.571429, 3.857143, 4.714286, 6.000000, 4.142857, 3.000000, 4.000000, 4.571429, 4.571…
## $ sexual [3m[38;5;246m<dbl>[39m[23m 1.8571429, 3.7142857, 2.5714286, 4.4285714, 3.4285714, 0.7142857, 1.1428571, 4.285714…
Create a table of data from users
who did not complete
either the personality_scores
questionnaire or the
disgust_scores
questionnaire. (Hint: this will require
two steps.)
no_personality <- anti_join(users, personality_scores, by = "user_id")
study6 <- anti_join(no_personality, disgust_scores, by = "user_id")
glimpse(study6)
## Rows: 17,728
## Columns: 3
## $ user_id [3m[38;5;246m<dbl>[39m[23m 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 37, 47, 51, 67, 78, 8…
## $ sex [3m[38;5;246m<chr>[39m[23m "male", "female", "female", "female", "male", "male", "male", "female", "female", "f…
## $ birthyear [3m[38;5;246m<dbl>[39m[23m 1972, 1978, 1981, 1980, 1964, 1945, 1973, 1985, 1982, 1965, 1985, 1974, 1984, 1954, …
Load new user data from users2.
Bind this table and the original users
table into a single
table called users_all
.
data("users2")
users_all <- bind_rows(users, users2)
glimpse(users_all)
## Rows: 112,043
## Columns: 3
## $ user_id [3m[38;5;246m<dbl>[39m[23m 0, 1, 2, 5, 8, 9, 10, 17, 19, 20, 21, 22, 23, 24, 27, 30, 31, 32, 33, 34, 35, 37, 47…
## $ sex [3m[38;5;246m<chr>[39m[23m NA, "female", "male", "male", "male", "male", "female", "female", "female", "male", …
## $ birthyear [3m[38;5;246m<dbl>[39m[23m NA, 1976, 1985, 1980, 1968, 1972, 1978, 1981, 1980, 1964, 1945, 1973, 1985, 1982, 19…
How many users are in both the first and second user table? Use code
to get this number; don’t read the row number from the environment and
type it in. (Hint: What does nrow(mtcars)
return?)
b_table <- dplyr::intersect(users, users2)
both_n <- nrow(b_table)
print(both_n)
## [1] 11602
How many unique users are there in total across the first and second user tables?
uu_table <- dplyr::union(users, users2)
unique_users <- nrow(uu_table)
print(unique_users)
## [1] 100441
How many users are in the first, but not the second, user table?
fu_table <- dplyr::setdiff(users, users2)
first_users <- nrow(fu_table)
print(first_users)
## [1] 40441
How many users are in the second, but not the first, user table?
su_table <- dplyr::setdiff(users2, users)
second_users <- nrow(su_table)
print(second_users)
## [1] 48398