This tutorial uses data downloaded from IPUMS’ ATUS-X extract builder. The data includes samples from 2003 - 2017.
Type | Variable | Label |
---|---|---|
H | RECTYPE | Record Type |
H | YEAR | Survey year |
H | CASEID | ATUS Case ID |
H | REGION | Region |
H | HH_SIZE | Number of people in household |
H | HH_CHILD | Children under 18 in household |
H | HH_NUMKIDS | Number of children under 18 in household |
H | AGEYCHILD | Age of youngest household child |
H | HH_NUMADULTS | Number of adults in household |
H | HHTENURE | Living quarters owned, rented, or occupied without rent |
P | PERNUM | Person number (general) |
P | LINENO | Person line number |
P | DAY | ATUS interview day of the week |
P | WT06 | Person weight, 2006 methodology |
P | AGE | Age |
P | SEX | Sex |
P | RACE | Race |
P | HISPAN | Hispanic origin |
P | MARST | Marital status |
P | EDUC | Highest level of school completed |
P | FULLPART | Full time/part time employment status |
P | SPOUSEPRES | Spouse or unmarried partner in household |
P | SPSEX | Sex of respondent’s spouse or unmarried partner |
P | HH_NUMOWNKIDS | Number of own children under 18 in household |
P | KIDUND13 | Own child under 13 in household |
A | ACTLINE | Activity line number |
A | ACTIVITY | Activity |
A | DURATION | Duration of activity |
Additionally, two time use variables were constructed in ATUSX:
Extract Notes:
IPUMS NOTE: To load data, you must download both the extract’s data and the DDI.
Download the .DAT file after ATUSX processes your request (you will get an email).
To download the DDI file, right click on the DDI link under the CODEBOOK heading, and select “save link file as”
Detailed instructions for importing the data can be found at: https://cran.r-project.org/web/packages/ipumsr/vignettes/ipums.html.
This R code was executed in the open source edition of RStudio.
You will need to change your file path to the local folder where you saved the ATUS data extracts.
Notice the “/” lean to the right and not the left “" if you are using a Windows computer.
setwd("C:/Users/Joanna/Dropbox/MaritalStatus")
library(ipumsr)
library(tidyverse)
Libraries can be installed with the commands install.packages("ipumsr")
and install.packages("tidyverse")
. These only need to be installed once but users must load the libraried each time Rstudio is started.
Users should change the "atus_00037.xml"
to the file name of their own ATUSX extract.
ddi <- read_ipums_ddi("atus_00037.xml")
data <- read_ipums_micro(ddi)
## Use of data from ATUS-X is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
# Make sure data is now a dataframe.
class(data)
#Look at the structure of the data
summary(data)
# Check-out the variable names
names(data)
# Make the variable names lowercase
data <- data %>% rename_all(tolower)
data <- data %>%
mutate( year = as.integer(lbl_clean(year)),
caseid = as.character(lbl_clean(caseid)),
hh_size = as.integer(lbl_clean(hh_size)),
hh_child = as.integer(lbl_clean(hh_child)),
hh_numkids = as.integer(lbl_clean(hh_numkids)),
ageychild = as.integer(lbl_clean(ageychild)),
hh_numadults = as.integer(lbl_clean(hh_numadults)),
hhtenure = as_factor(lbl_clean(hhtenure)),
day = as_factor(lbl_clean(day)),
wt06 = as.integer(lbl_clean(wt06)),
age = as.integer(lbl_clean(age)),
sex = as_factor(lbl_clean(sex)),
race = as_factor(lbl_clean(race)),
hispan = as_factor(lbl_clean(hispan)),
marst = as_factor(lbl_clean(marst)),
educ = as.integer(lbl_clean(educ)),
fullpart = as.character(lbl_clean(fullpart)),
spousepres = as_factor(lbl_clean(spousepres)),
spsex = as_factor(lbl_clean(spsex)),
hh_numownkids = as.integer(lbl_clean(hh_numownkids)),
kidund13 = as_factor(lbl_clean(kidund13)),
activity = as.character(lbl_clean(activity)),
region = as_factor(lbl_clean(region)),
telesolo = as.integer(lbl_clean(telesolo)),
telesp = as.integer(lbl_clean(telesp)))
# Change NA to 0 for duration & activity minutes
data[["duration"]][is.na(data[["duration"]])] <- 0
summary(data$duration)
data[["activity"]][is.na(data[["activity"]])] <- "0"
summary(data$activity)
# Check that duration = 1440
data %>%
group_by(caseid) %>%
summarise(total= sum(duration))
## # A tibble: 191,558 x 2
## caseid total
## <chr> <dbl>
## 1 20030100013280 1440
## 2 20030100013344 1440
## 3 20030100013352 1440
## 4 20030100013848 1440
## 5 20030100014165 1440
## 6 20030100014169 1440
## 7 20030100014209 1440
## 8 20030100014427 1440
## 9 20030100014550 1440
## 10 20030100014758 1440
## # ... with 191,548 more rows
This creates a logical object in RStudio for each activity category.
The activity codes can be found in ATUS’Activity Coding Lexicon.
NOTE: The figure published in PAA Affairs used a more conservative measure of housework activities. This code includes all housework activities.
# Childcare
ccare <- data$activity %in%
c(030100:030400, 080100:080200, 180381)
# Housework
hswrk <- data$activity %in%
c(020101:030000, 080200:080300, 080700:080800, 090100:100000, 160106, 070101, 180701, 180904, 180807, 180903, 080699, 160106)
# Leisure
leisure <- data$activity %in%
c(120100:130000, 130100:140000, 160101, 160102, 181200:181400)
# Sleep
sleep <- data$activity %in%
c(010100:020000)
#Leisure sub-types
socl <- data$activity %in%
c(120100:120200, 120200:120300, 120400:120500, 120501, 120502, 120504, 120599, 130200:130300, 130302, 130402)
actl <- data$activity %in%
c(120307, 120309:120313, 130100:130200, 130301, 130401, 130499)
pass <- data$activity %in%
c(120301:120306, 120308, 120399, 120503)
# Television
tele <- data$activity %in%
c(120303, 120304)
Next, we use those logical objects to create the activity category variables in our dataset. First, we create a new variable called “actcat”, with missing values. This variable will identify the activity categories, using the logical objects we created.
data$actcat<-NA
data$actcat[ccare] <- "child care"
data$actcat[hswrk] <- "housework"
data$actcat[leisure] <- "leisure"
data$actcat[sleep] <- "sleep"
data$actcat <- as.character(data$actcat)
We’ll also create a leisure category variable and one for television. Sub-categories of activties need their own variables. Here, television is a subcategory of passive leisure, which is a subcategory of leisure. Thus, we have more refined identification separated into different variables.
data$leiscat<-NA
data$leiscat[socl] <- "social leisure"
data$leiscat[actl] <- "active leisure"
data$leiscat[pass] <- "passive leisure"
data$leiscat <- as.character(data$leiscat)
data$telecat<-NA
data$telecat[tele] <- "television"
data$telecat <- as.character(data$telecat)
caseid | activity | duration | actcat | leiscat | telecat |
---|---|---|---|---|---|
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 130124 | 60 | leisure | active leisure | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 10201 | 30 | sleep | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 10101 | 600 | sleep | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 120303 | 150 | leisure | passive leisure | television |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 110101 | 5 | NA | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 120303 | 175 | leisure | passive leisure | television |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 10101 | 270 | sleep | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 10201 | 10 | sleep | NA | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
20030100013280 | 130124 | 140 | leisure | active leisure | NA |
20030100013280 | 0 | 0 | NA | NA | NA |
Next we need to create summary variables with the total duration of each activty for each person
# Master activty variables
data <- data %>%
group_by(caseid) %>%
summarise (leisure = sum(duration[actcat == "leisure"], na.rm=TRUE),
sleep = sum(duration[actcat == "sleep"], na.rm=TRUE),
hswrk = sum(duration[actcat == "housework"], na.rm=TRUE),
ccare = sum(duration[actcat == "child care"], na.rm=TRUE)) %>%
inner_join(data, by='caseid')
# Leisure activity variables
data <- data %>%
group_by(caseid) %>%
summarise (socl = sum(duration[leiscat == "social leisure"], na.rm=TRUE),
actl = sum(duration[leiscat == "active leisure"], na.rm=TRUE),
pass = sum(duration[leiscat == "passive leisure"], na.rm=TRUE)) %>%
inner_join(data, by='caseid')
# Television variables
data <- data %>%
group_by(caseid) %>%
summarise (tele = sum(duration[telecat == "television"], na.rm=TRUE)) %>%
inner_join(data, by='caseid')
caseid | leisure | sleep | hswrk | ccare | socl | actl | pass | tele |
---|---|---|---|---|---|---|---|---|
20030100013280 | 525 | 910 | 0 | 0 | 0 | 200 | 325 | 325 |
20030100013280 | 525 | 910 | 0 | 0 | 0 | 200 | 325 | 325 |
20030100013280 | 525 | 910 | 0 | 0 | 0 | 200 | 325 | 325 |
20030100013280 | 525 | 910 | 0 | 0 | 0 | 200 | 325 | 325 |
20030100013280 | 525 | 910 | 0 | 0 | 0 | 200 | 325 | 325 |
Currently, the data is in long format, with each person represented in multiple rows in the dataset. We will aggregate the data so each person is represented once per row.
caseid | pernum | lineno | sex | actline | activity | sleep |
---|---|---|---|---|---|---|
20030100013280 | NA | NA | NA | NA | 0 | 910 |
20030100013280 | 1 | 1 | Male | NA | 0 | 910 |
20030100013280 | NA | NA | NA | 1 | 130124 | 910 |
20030100013280 | NA | NA | NA | NA | 0 | 910 |
20030100013280 | NA | NA | NA | 2 | 10201 | 910 |
20030100013280 | NA | NA | NA | NA | 0 | 910 |
20030100013280 | NA | NA | NA | 3 | 10101 | 910 |
20030100013280 | NA | NA | NA | NA | 0 | 910 |
20030100013280 | NA | NA | NA | 4 | 120303 | 910 |
20030100013280 | NA | NA | NA | NA | 0 | 910 |
Create a summary dataset for the variables that repeat each row, per person
max <- data %>%
group_by(caseid) %>%
summarise_at(vars(tele, socl, actl, pass, ccare, hswrk, leisure, sleep, year), funs(max))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## please use list() instead
##
## # Before:
## funs(name = f(.)
##
## # After:
## list(name = ~f(.))
## This warning is displayed once per session.
Notice that each person is now represented only once per row.
## # A tibble: 5 x 9
## caseid tele socl actl pass ccare hswrk leisure sleep
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20030100013280 325 0 200 325 0 0 525 910
## 2 20030100013344 60 530 0 60 60 0 610 680
## 3 20030100013352 120 220 0 120 0 325 340 640
## 4 20030100013848 265 0 0 265 5 270 265 755
## 5 20030100014165 60 0 60 60 120 70 130 460
Create a summary dataset for the variables that appear in the rectype = 1 rows, by person
rec1 <- data %>%
filter(rectype == 1) %>%
select(caseid, hh_size, hh_child, hh_numkids, ageychild, hh_numadults, hhtenure, region)
## # A tibble: 5 x 8
## caseid hh_size hh_child hh_numkids ageychild hh_numadults hhtenure region
## <chr> <int> <int> <int> <int> <int> <fct> <fct>
## 1 20030~ 3 0 0 999 3 Owned o~ West
## 2 20030~ 4 1 2 0 2 Owned o~ West
## 3 20030~ 2 0 0 999 2 Owned o~ West
## 4 20030~ 4 1 2 9 2 Owned o~ South
## 5 20030~ 4 1 2 14 2 Owned o~ South
Create a summary dataset for the variables that appear in the rectype = 2 rows, by person
rec2 <- data %>%
filter(rectype == 2) %>%
select(caseid, age, sex, race, hispan, marst, educ, fullpart, spousepres, spsex,
hh_numownkids, kidund13, day, wt06, telesolo, telesp)
## # A tibble: 5 x 16
## caseid age sex race hispan marst educ fullpart spousepres spsex
## <chr> <int> <fct> <fct> <fct> <fct> <int> <chr> <fct> <fct>
## 1 20030~ 60 Male Blac~ Not H~ Marr~ 41 2 Spouse pr~ Fema~
## 2 20030~ 41 Fema~ Whit~ Not H~ Marr~ 30 2 Spouse pr~ Male
## 3 20030~ 26 Fema~ Whit~ Not H~ Marr~ 31 2 Spouse pr~ Male
## 4 20030~ 36 Fema~ Blac~ Not H~ Marr~ 21 99 Spouse pr~ Male
## 5 20030~ 51 Male Whit~ Not H~ Marr~ 42 1 Spouse pr~ Fema~
## # ... with 6 more variables: hh_numownkids <int>, kidund13 <fct>,
## # day <fct>, wt06 <int>, telesolo <int>, telesp <int>
Put them all together.
atus <- reduce(list(max, rec1, rec2),
left_join, by = "caseid")
Take a look at the person-level file with activity duration summary variables
## # A tibble: 5 x 32
## caseid tele socl actl pass ccare hswrk leisure sleep year hh_size
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 20030~ 325 0 200 325 0 0 525 910 2003 3
## 2 20030~ 60 530 0 60 60 0 610 680 2003 4
## 3 20030~ 120 220 0 120 0 325 340 640 2003 2
## 4 20030~ 265 0 0 265 5 270 265 755 2003 4
## 5 20030~ 60 0 60 60 120 70 130 460 2003 4
## # ... with 21 more variables: hh_child <int>, hh_numkids <int>,
## # ageychild <int>, hh_numadults <int>, hhtenure <fct>, region <fct>,
## # age <int>, sex <fct>, race <fct>, hispan <fct>, marst <fct>,
## # educ <int>, fullpart <chr>, spousepres <fct>, spsex <fct>,
## # hh_numownkids <int>, kidund13 <fct>, day <fct>, wt06 <int>,
## # telesolo <int>, telesp <int>
# Gender
levels(atus$sex) <- c('Men', 'Women')
# Age
summary(atus$age)
# Marital status
# cohabitors in one group regardless of marital history
# use spousepres variable as indicator of marital status at time of ATUS diary
atus <- atus %>%
mutate(
mar = case_when(
spousepres == "Spouse present" ~ "Married",
spousepres == "Unmarried partner present" ~ "Cohabiting",
marst == "Never married" & spousepres == "No spouse or unmarried partner present" ~ "Single",
marst != "Widowed" & marst != "Never married" &
spousepres == "No spouse or unmarried partner present" ~ "Divorced/Separated",
TRUE ~ NA_character_
))
atus$mar <- as_factor(atus$mar, levels = c("Married", "Cohabiting", "Single", "Divorced/Separated", ordered = TRUE))
## Warning: Some components of ... were not used: levels
# Spouse/partner sex
atus <- atus %>%
mutate(
spsex = case_when(
spsex == "Male" ~ "Male",
spsex == "Female" ~ "Female",
spsex == "NIU (Not in universe)" ~ "NIU",
TRUE ~ NA_character_
))
atus$spsex <- as_factor(atus$spsex, levels = c("Male", "Female", "NIU"))
## Warning: Some components of ... were not used: levels
# Extended Family Member
atus <- atus %>%
mutate(
exfam = case_when(
((spousepres == "Spouse present" | spousepres == "Unmarried partner present") & hh_numadults <= 2) |
((spousepres == "No spouse or unmarried partner present") & hh_numadults <=1) ~ "No extra adults",
((spousepres == "Spouse present" | spousepres == "Unmarried partner present") & hh_numadults >= 3) |
((spousepres == "No spouse or unmarried partner present") & hh_numadults >=2) ~ "Extra adults",
TRUE ~ NA_character_
))
atus$exfam <- as_factor(atus$exfam, levels = c("No extra adults", "Extra adults"))
## Warning: Some components of ... were not used: levels
atus$exfam <- relevel(atus$exfam, ref = "No extra adults")
#Kid under 2
atus <- atus %>%
mutate(
kidu2 = case_when(
ageychild <=2 ~ "Child < 2",
TRUE ~ "No children < 2"
))
atus$kidu2 <- as_factor(atus$kidu2, levels = c("Child < 2", "No children < 2", ref = "No children < 2"))
## Warning: Some components of ... were not used: levels
# Number of own HH kids
summary(atus$hh_numownkids)
# Employment
atus <- atus %>%
mutate(
employ = case_when(
fullpart == "1" ~ "Full-time",
fullpart == "2" ~ "Part-time",
fullpart == "99" ~ "Not employed",
TRUE ~ NA_character_
))
atus$employ <- as_factor(atus$employ, levels = c("Full-time", "Part-time", "Not employed"))
## Warning: Some components of ... were not used: levels
# Education
atus <- atus %>%
mutate(
educ = case_when(
(educ >= 10 & educ <= 17) ~ "Less than high school",
(educ >= 20 & educ <= 21) ~ "High school",
(educ >= 30 & educ <= 31) ~ "Some college",
(educ >= 40 & educ <= 43) ~ "BA or higher",
TRUE ~ NA_character_
))
atus$educ <- as_factor(atus$educ, levels = c("Less than high school", "High school", "Some college", "BA or higher", ref = "High school"))
## Warning: Some components of ... were not used: levels
# Race
atus <- atus %>%
mutate(
raceth = case_when(
race == "White only" & hispan == "Not Hispanic" ~ "White",
race == "Black only" & hispan == "Not Hispanic" ~ "Black",
race == "Asian only" & hispan == "Not Hispanic" ~ "Asian",
hispan != "Not Hispanic" ~ "Hispanic",
TRUE ~ "Other"
))
atus$raceth <- as_factor(atus$raceth, levels = c("White", "Black", "Hispanic", "Asian", "Other"))
## Warning: Some components of ... were not used: levels
atus$raceth <- relevel(atus$raceth, ref = "White")
# Weekend
atus <- atus %>%
mutate(
weekend = case_when(
day == "Sunday" | day == "Saturday" ~ "Weekend",
day == "Monday" | day == "Tuesday" | day == "Wednesday" |
day == "Thursday" | day == "Friday" ~ "Weekday",
TRUE ~ NA_character_
))
atus$weekend <- as_factor(atus$weekend, levels = c("Weekday", "Weekend"))
## Warning: Some components of ... were not used: levels
atus$weekend <- relevel(atus$weekend, ref = "Weekday")
# Region
summary(atus$region)
# Home ownership
atus <- atus %>%
mutate(
ownrent = case_when(
hhtenure == "Owned or being bought by a household member" ~ "Own",
hhtenure == "Rented for cash" ~ "Rent",
hhtenure == "Occupied without payment of cash rent" ~ "Other",
hhtenure == "NIU (Not in universe)" ~ "Other",
TRUE ~ NA_character_
))
atus$ownrent <- as_factor(atus$ownrent, levels = c("Own", "Rent", "Other"))
## Warning: Some components of ... were not used: levels
Limit the dataset to White, Black, and Hispanic mothers of children with kids 13 or younger.
Let’s also restict the dataset to mothers who are prime working age.
atus <- filter(atus, sex == "Women") # women
atus <- filter(atus, spsex == "Male" | spsex == "NIU") # Different sex couples or singles
atus <- filter(atus, hh_numownkids >=1) # mothers
atus <- filter(atus, ageychild <=13) #mothers of kids 13 or younger
atus <- filter(atus, age >= 18 & age <=54) #prime working age
atus <- filter(atus, raceth != "Asian" & raceth != "Other") # white, black, and Hispanic
# Keep only the variables of interest
atus <- select(atus, caseid, wt06, year, ccare, hswrk, leisure, sleep, socl, actl, pass, tele, telesolo, telesp, age, sex, mar, exfam, kidu2, hh_numownkids, employ, educ, raceth, weekend, region, ownrent)
# Listwise deletion
atus <- na.omit(atus)
# If want to look at only recent survey years, remove # below. Don't foget to then update the caption of the figure with the correct survey years.
# atus <- filter(atus, year >= 2012)
Here’s a look at the data
## Classes 'tbl_df', 'tbl' and 'data.frame': 30557 obs. of 25 variables:
## $ caseid : chr "20030100013344" "20030100013848" "20030100014550" "20030100014758" ...
## $ wt06 : int 1735322 6622022 1528296 4277052 505227 1058351 3594555 2574209 2720948 2770442 ...
## $ year : num 2003 2003 2003 2003 2003 ...
## $ ccare : num 60 5 0 80 60 62 105 378 51 60 ...
## $ hswrk : num 0 270 395 170 50 58 390 295 34 65 ...
## $ leisure : num 610 265 120 542 175 113 180 197 131 240 ...
## $ sleep : num 680 755 720 255 615 615 660 510 543 585 ...
## $ socl : num 530 0 0 90 0 0 120 165 0 10 ...
## $ actl : num 0 0 0 150 0 30 60 30 0 10 ...
## $ pass : num 60 265 120 267 175 83 0 0 131 220 ...
## $ tele : num 60 265 120 0 175 83 0 0 111 180 ...
## $ telesolo : int 0 0 0 0 0 30 0 0 98 120 ...
## $ telesp : int 60 265 120 0 0 53 0 0 0 0 ...
## $ age : int 41 36 33 39 33 32 45 39 25 42 ...
## $ sex : Factor w/ 2 levels "Men","Women": 2 2 2 2 2 2 2 2 2 2 ...
## $ mar : Factor w/ 4 levels "Married","Cohabiting",..: 1 1 1 1 3 1 1 4 4 3 ...
## $ exfam : Factor w/ 2 levels "No extra adults",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ kidu2 : Factor w/ 2 levels "No children < 2",..: 2 1 1 1 1 2 1 2 1 1 ...
## $ hh_numownkids: int 2 2 3 4 1 1 2 2 1 2 ...
## $ employ : Factor w/ 3 levels "Part-time","Not employed",..: 1 2 3 3 3 3 3 1 3 3 ...
## $ educ : Factor w/ 4 levels "BA or higher",..: 2 3 2 2 2 1 1 2 3 2 ...
## $ raceth : Factor w/ 5 levels "White","Black",..: 1 2 1 2 1 1 1 2 1 2 ...
## $ weekend : Factor w/ 2 levels "Weekday","Weekend": 2 1 2 1 2 2 1 1 1 1 ...
## $ region : Factor w/ 4 levels "Northeast","Midwest",..: 4 3 2 1 3 1 3 2 2 3 ...
## $ ownrent : Factor w/ 3 levels "Own","Rent","Other": 1 1 1 2 1 1 1 2 2 2 ...
## - attr(*, "na.action")= 'omit' Named int 45 49 63 82 92 131 132 140 154 157 ...
## ..- attr(*, "names")= chr "45" "49" "63" "82" ...
Load the ggeffects
package in order to compute marginal effects from statistical models and save the results as tidy data frames.
library(ggeffects)
For each of activity variables of interest, run a linear model (lm
) and then calculate the marginal effects (ggeffect
)
ggeffect
calculates the adjusted predictions at the means, using proportions to fix factors
#Childcare
lm_care <- lm(ccare ~ mar + exfam + hh_numownkids + kidu2 + educ + employ + raceth + age + weekend + region + ownrent, data = atus, weight=wt06)
pcare <- ggeffect(lm_care, terms = "mar")
#Housework
lm_hswrk <- lm(hswrk ~ mar + exfam + hh_numownkids + kidu2 + educ + employ + raceth + age + weekend + region + ownrent, data = atus, weight=wt06)
phswrk <- ggeffect(lm_hswrk, terms = "mar")
#Leisure
lm_leis <- lm(leisure ~ mar + exfam + hh_numownkids + kidu2 + educ + employ + raceth + age + weekend + region + ownrent, data = atus, weight=wt06)
pleis <- ggeffect(lm_leis, terms = "mar")
#Sleep
lm_sleep <- lm(sleep ~ mar + exfam + hh_numownkids + kidu2 + educ + employ + raceth + age + weekend + region + ownrent, data = atus, weight=wt06)
psleep <- ggeffect(lm_sleep, terms = "mar")
For each of the marginal effects datasets, label the group values as the activity
levels(pcare$group)[levels(pcare$group)=="1"] <- "Childcare"
levels(phswrk$group)[levels(phswrk$group)=="1"] <- "Housework"
levels(pleis$group)[levels(pleis$group)=="1"] <- "Leisure"
levels(psleep$group)[levels(psleep$group)=="1"] <- "Sleep"
Maniuplate the marital status variable
# Change the variable to be a factor variable from numeric
pcare$x <- as.factor(pcare$x)
phswrk$x <- as.factor(phswrk$x)
pleis$x <- as.factor(pleis$x)
psleep$x <- as.factor(psleep$x)
Combine the datatables
pred <- rbind(pcare, phswrk, pleis, psleep)
Prepare the categorical variables for data visualization
# Revalue the marital status factors to be readable
levels(pred$x)[levels(pred$x)=="3"] <- "Married"
levels(pred$x)[levels(pred$x)=="1"] <- "Cohabiting"
levels(pred$x)[levels(pred$x)=="4"] <- "Single"
levels(pred$x)[levels(pred$x)=="2"] <- "Divorced/Separated"
# Order the marital status and activity factors
pred$x <- ordered(pred$x, levels = c("Married", "Cohabiting", "Single", "Divorced/Separated"))
pred$group <- ordered(pred$group, levels = c("Childcare", "Housework", "Leisure", "Sleep"))
Calculate the difference variable (mothers time use compared to married mothers time use, by activity)
pred <- pred %>%
group_by(group) %>%
mutate(diff = predicted - predicted[x == "Married"])
pred %>%
filter(x != "Married") %>%
ggplot(aes(x, diff, fill = x, label = round(diff, 0))) +
geom_col() +
facet_grid(~group) +
ggtitle("Married Mothers Report More Housework and Less Leisure and Sleep Than Other Mothers") +
labs(x = NULL, y = NULL, subtitle = "Predicted minutes per day with model controls",
caption = "Source: American Time Use Surveys (2003 - 2017) \n Models control for extra adults, number of household kids, kids under 2, \n education, employment, race-ethnicity, age, weekend diary day, and region") +
theme_minimal() +
theme(plot.subtitle = element_text(size = 11, vjust = 1),
plot.caption = element_text(vjust = 1),
legend.position="none",
strip.text.x = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
plot.title = element_text(size = 16, face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
scale_y_continuous(breaks = c(-30, -20, -10, 0, 10, 20, 30), labels=c("-30", "-20", "-10", "Married mothers' \n minutes per day", "10", "20", "30")) +
scale_fill_manual(values=c("#5D478B", "#CD3278", "#116A66")) +
geom_errorbar(aes(ymin=diff-std.error, ymax=diff+std.error), width=.2,
position=position_dodge(.9), color="grey")