--- title: "multivariate logistic regression" output: html_document --- Objective: Examine the relationship between Biomarker (anti-A09, anti-A15 & anti-C1q), Disease Duration (between Diagnosis and Blood Sampling =DDDXBE)and Disease-Activity ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} library(car) ``` ```{r} #setwd("/Volumes/KLIM$/Kleer/Project Paper/Submission/DRYAD") #setwd("Z:/Kleer/Project Paper/Masterfiles/4. Testfiles/") setwd("/Volumes/KLIM$/Kleer/Project Paper/Masterfiles/4. Testfiles") getwd() ``` ```{r, read in data} data <- read.csv2("MASTERFILE_kleer_20210305.csv") ``` ```{r create SLE Subset} #creating SLE subset Group for logistic regression SLEsubset <- data[1:378, 1:256] ``` ```{r Definition of active Disease} SLEsubset$PGA <- factor(SLEsubset$PGA,levels = c("inactive","moderat", "active", "very active"), labels = c(0,1,2,3)) SLEsubset$PGA <- as.numeric(SLEsubset$PGA) #SLEsubset$PGA #1=inactive, 2=moderate, 3= active, 4= very active # activity defined as PGA >= 1 (>=moderate) & SLEDAI >= 6 SLEsubset$activity <- ifelse(SLEsubset$SELENA>=6 & SLEsubset$PGA >=2, yes=1,no=0) #because 2=moderatel #SLEsubset$activity_1 <- factor(SLEsubset$activity_1, levels = c(0,1), #labels=c("inactive", "active")) table(SLEsubset$activity) ``` adding Categorical predictors (Cutoffs) to data.frame "SLEsubset" ```{r Cutoff} SLEsubset$AntiC1qpos. <- ifelse(SLEsubset$antiC1q >60, yes=1, no=0) SLEsubset$A08pos. <- ifelse(SLEsubset$A08 >160, yes=1, no=0) SLEsubset$A08shiftpos. <- ifelse(SLEsubset$A08shift>100, yes=1, no=0) ``` ```{r} #A15 former called A08 #multivariate logistic regression multi_logi_A08_acti_DDXBE <- glm(A08pos.~ DDDXBE + activity, data = SLEsubset, family = "binomial") summary(multi_logi_A08_acti_DDXBE) exp(multi_logi_A08_acti_DDXBE$coefficients) exp(confint(multi_logi_A08_acti_DDXBE)) #graphical presentation of multivariate regression =fitting two regression models with the same slope for Disease Duration, stratified by activity (two Activity-stratified lines) # create plot plot(SLEsubset$DDDXBE, SLEsubset$A08pos.,type="n", main = "anti-A15", cex.main=2, xlab="Disease Duration (Diagnosis - Blood Sampling) [Years]", cex.lab=1.5, ylab="Probability of anti-A15 Positivity") #add line for patients with inactive Disease curve(plogis( (multi_logi_A08_acti_DDXBE$coefficients[1]) + # Intercept inactive Disease (multi_logi_A08_acti_DDXBE$coefficients[2])* x), # Slope 0, 60, add=TRUE, lwd=1, lty=1) #add line for patients with active Disease curve(plogis( (multi_logi_A08_acti_DDXBE$coefficients[1]) + (multi_logi_A08_acti_DDXBE$coefficients[3]) + #Intercept active Disease (multi_logi_A08_acti_DDXBE$coefficients[2])*x), #slope 0, 60, add=TRUE, lwd=3, lty=3) legend(0, 0.90, c("active Disease", "inactive Disease"), lty=c(3,1), lwd=c(3,1), bty="n", cex=1.5) #measure how much the variance of a regression coefficient is inflated due to multicollinearity in the model by variance inflation factor (or VIF) vif(multi_logi_A08_acti_DDXBE) # very low -> no evidence of multicollinerarity ``` ```{r} #A09 former called A08shift #multivariate logistic regression library(car) #vif function multi_logi_A08shift_acti_DDXBE <- glm(A08shiftpos.~ DDDXBE + activity, data = SLEsubset, family = "binomial") summary(multi_logi_A08shift_acti_DDXBE) exp(multi_logi_A08shift_acti_DDXBE$coefficients) exp(confint(multi_logi_A08shift_acti_DDXBE)) #graphical presentation of multivariate regression =fitting two regression models with the same slope for Disease Duration, stratified by activity (two Activity-stratified lines) # create plot plot(SLEsubset$DDDXBE, SLEsubset$A08shiftpos.,type="n", main = "anti-A09", cex.main=2, xlab="Disease Duration (Diagnosis - Blood Sampling) [Years]", cex.lab=1.5, ylab="Probability of anti-A09 Positivity") #add line for patients with inactive Disease curve(plogis( (multi_logi_A08shift_acti_DDXBE$coefficients[1]) + # Intercept inactive Disease (multi_logi_A08shift_acti_DDXBE$coefficients[2])* x), # Slope 0, 60, add=TRUE, lwd=1, lty=1) #add line for patients with active Disease curve(plogis( (multi_logi_A08shift_acti_DDXBE$coefficients[1]) + (multi_logi_A08shift_acti_DDXBE$coefficients[3]) + #Intercept active Disease (multi_logi_A08shift_acti_DDXBE$coefficients[2])*x), #slope 0, 60, add=TRUE, lwd=3, lty=3) legend(0, 0.90, c("active Disease", "inactive Disease"), lty=c(3,1), lwd=c(3,1), bty="n", cex=1.5) vif(multi_logi_A08shift_acti_DDXBE) # very low -> no evidence of multicollinerarity ``` ```{r} multi_logi_antiC1q_acti_DDXBE <- glm(AntiC1qpos.~ DDDXBE + activity, data = SLEsubset, family = "binomial") summary(multi_logi_antiC1q_acti_DDXBE) exp(multi_logi_antiC1q_acti_DDXBE$coefficients) exp(confint(multi_logi_antiC1q_acti_DDXBE)) #plot with two Activity-stratified lines plot(SLEsubset$DDDXBE, SLEsubset$AntiC1qpos.,type="n", main = "anti-C1q", cex.main=2, xlab="Disease Duration (Diagnosis-Blood-Sampling) [Years]", cex.lab=1.5, ylab="Probability of anti-C1q Positivity") curve(plogis( (multi_logi_antiC1q_acti_DDXBE$coefficients[1]) + (multi_logi_antiC1q_acti_DDXBE$coefficients[2])* x), 0, 60, add=TRUE, lwd=1, lty=1) curve(plogis( (multi_logi_antiC1q_acti_DDXBE$coefficients[1])+ (multi_logi_antiC1q_acti_DDXBE$coefficients[3]) + (multi_logi_antiC1q_acti_DDXBE$coefficients[2])*x), 0, 60, add=TRUE, lwd=3, lty=3) legend(0, 0.90, c("active Disease", "inactive Disease"), lty=c(3,1), lwd=c(3,1), bty="n", cex=1.5) vif(multi_logi_antiC1q_acti_DDXBE ) # very low -> no evidence of multicollinerarity ```