rm(list=ls()) --- title: "Subsetting the raw data to create subsets needed for models 1A, 1B, 2 and 3" author: "Rowan Titchener" date: "2021" output: html_document --- #set working directory #setwd("/...") #setwd("/Users/rowantitchener/Documents/00_PhD/Study1_social_disappointment/15_Submissions/10_Royal Society Open Science (acceptance)/04_Upload-to-figshare/0_raw-data") #"Pre-R" data handling: #1. The videos were coded using the Mangold Interact software. Mangold Interact creates a .act file. This .act file was exported to Excel (.xlsx) #Prior to export from Mangold Interact RT manually checked that all 'y' entries in the discount_trial column were justified as per the following 5 criteria: #S_platform_travels_empty #S_platform_travels_with_more_than_1_item #P_platform_travels_empty #P_platform_travels_with_more_than_1_item #S_accidentally_drops_food #For detail on discounts refer to section 3 of the electronic supplementary materials #2. Within Excel spreadsheet #Dates were printed in ISO 8601 format (yyyy-mm--dd) #All subsequent data-handling took place in R #Libraries # library(knitr) library(tidyverse) library(dplyr) library(stringr) library(forcats) #Import data xdata <- read_csv("2021-06-02_social-disappointment_data.csv", col_names = TRUE) #Tidy #remove the rows comprised solely of NAs (these rows are an artefact of the Mangold Interact export process) xdata <- xdata %>% drop_na(date) #rename the Duration_Time column xdata <- xdata %>% rename(pull_latency = Duration_Time) #add a column which documents the proportion of refusals per session xdata <- mutate(xdata, proportion = xdata$number_refusals_in_session / xdata$number_trials_in_session) #re-classify columns as factors levels_subject <- c("lukas", "mara", "mars", "max", "meiwi", "mila", "mona", "moritz", "sambia", "sherry", "simon", "smilla") xdata$subject <-factor(xdata$subject, levels = levels_subject) levels_sex <- c("female", "male") xdata$sex <- factor(xdata$sex, levels = levels_sex ) levels_condition <- c("inequality", "equality") xdata$condition <- factor(xdata$condition, levels = levels_condition ) xdata$condition <- fct_recode(xdata$condition, "test" = "inequality", "control" = "equality") levels_distributor <- c("human", "machine") xdata$distributor <- factor(xdata$distributor, levels = levels_distributor ) levels_partner_presence <- c("n", "y") xdata$partner_presence <- factor(xdata$partner_presence, levels = levels_partner_presence ) levels_partner_identity <- c("ghost", "ilja", "linus") xdata$partner_identity <- factor(xdata$partner_identity, levels = levels_partner_identity ) levels_counterbalance <- c("partner", "no_partner") xdata$counterbalance <- factor(xdata$counterbalance, levels = levels_counterbalance ) levels_refusal <- c("1", "0") xdata$refusal <- factor(xdata$refusal, levels = levels_refusal ) levels_discount_trial <- c("n", "y") xdata$discount_trial <-factor(xdata$discount_trial, levels = levels_discount_trial) #Create data subsets for each model (Models 1A, 1B, 2 & 3) #Subset 1 - saved as a .csv file titled model1a.csv #Subset 1 contains: #all test condition trials #all non-discounted trials #all food refusal/food acceptance behaviours model1a_subset <- filter(xdata, condition == "test", discount_trial == "n", behavior == "S_eats_food" | behavior == "S_refuses_to_consume_food" | behavior == "S_refuses_to_participate" | behavior == "S_refuses_to_take_food") %>% select(date, condition, refusal, distributor, partner_presence, session, trial, sex, counterbalance, subject) length(model1a_subset$trial) #1066 write_excel_csv(model1a_subset, "model1a.csv") #Subset 2 - saved as a .csv file titled model1B.csv #Subset 2 was identical to Subset 1 except it only contains the trials that were carried out while ilja was in the partner role. model1b_subset <- filter(xdata, condition == "test") %>% filter(discount_trial == "n") %>% filter(behavior == "S_eats_food" | behavior == "S_refuses_to_consume_food" | behavior == "S_refuses_to_participate" | behavior == "S_refuses_to_take_food") %>% filter(subject == "mars" | subject == "moritz" & session %in% c("1", "2", "3", "4", "5", "6", "7") | subject == "sherry" & session %in% c("1", "2", "3", "4") | subject == "mona" & session %in% c("1", "2", "3") | subject == "mila" & session %in% c("1", "2", "3", "4") | subject == "sambia" | subject == "lukas" & session %in% c("1", "2", "3", "4")| subject == "meiwi" & session %in% c("1", "2", "3", "4", "5") | subject == "mara" & session %in% c("1", "2") | subject == "smilla" & session %in% c("1", "2", "3", "4") | subject == "max" & session %in% c("1", "2") | subject == "simon" &session %in% c("1", "2", "3", "4") )%>% select(date, condition, refusal, distributor, partner_presence, session, trial, sex, counterbalance, subject) length(model1b_subset$trial) #597 write_excel_csv(model1b_subset, "model1b.csv") #Subset 3 - saved as a .csv file titled model2.csv #Subset 3 contains: #all equality condition trials #all non-discounted trials #all food refusal/food acceptance behaviours model2_subset <- filter(xdata, condition == "control") %>% filter(discount_trial == "n") %>% filter(behavior == "S_eats_food" | behavior == "S_refuses_to_consume_food" | behavior == "S_refuses_to_participate" | behavior == "S_refuses_to_take_food") %>% select(date, condition, refusal, distributor, session, trial, sex, subject) length(model2_subset$trial) #437 write_excel_csv(model2_subset, "model2.csv") #Subset 4 - saved as a .csv file titled _model3.csv_. #Subset 4 contains: #all inequality condition trials #all non-discounted trials #all pull-latencies ("S_latency_to_pull" behaviour) #all time-out latencies ("S_timeout_latency" behaviour) model3_subset <- filter(xdata, condition == "test") %>% filter(discount_trial == "n") %>% filter(behavior == "S_latency_to_pull" | behavior == "S_timeout_latency") %>% select(date, condition, pull_latency, distributor, partner_presence, session,trial, sex, counterbalance, behavior, subject) length(model3_subset$trial) #1066 write_excel_csv(model3_subset, "model3.csv") #### Citation information sessionInfo() R.Version() citation()