R Script for the data analysis of the Conference Paper “Social Health Insurance & Inequality in China”

rm(list=ls()) ## remove objects from workplace
setwd("D:/R/leedsdata") ## set workdirectory
getwd()

## Import the data

library(tidyverse)
library(readxl)
healthcare<-read_excel("Healthcare2014.xlsx", "Sheet1")
head(healthcare)
str(healthcare)
sum(is.na(healthcare))
save(healthcare,file = "healthcare.Rda")

## Select variables

library(dplyr)
load("healthcare.Rda")
healthcare01<- dplyr::select (healthcare, provcode,
Ab01,Ac01, Ad01,Ae01,Ag01,Ah001,Ai01,Aj01,Ah101,Ah201,Ah301,Ah401,Ah501,Ah601,Ah701,Ah801,
B01_1,B02_1,B07_1,B07_1,B07_2,B07_3,B07_4,B07_5,B07_6,B07_7,B07_8,B07_9,
D01,D06,D07,
F01,F031,F041,F042,F04C,F03,F05,F06)
useBytes = TRUE
str(healthcare01)
save(healthcare01,file = "healthcare01.Rda")
load("healthcare01.Rda")
library(dplyr)
h01<-filter(healthcare01,Ab01>0&Ac01>0&Ae01>0&Ag01>0&Ai01>0&Aj01>0&Ah001>=0&Ah101>=0&Ah201>=0&Ah301>=0&Ah401>=0&Ah501>=0&Ah601>=0&Ah701>=0&Ah801>=0&F01>0&F06>=0&D06>=0&D01>=0&F01>0&F03>0&D07!= -1&Ad01!= -1)
h01<-filter(h01,B01_1>0|B01_1< -7) h01<-filter(h01,F041< -7 |F041>= 0)
h01<-filter(h01,F042< -7 |F042>= 0)
sum(is.na(h01))
str(h01)

## Rename variables

h01$provcode[h01$provcode== 13]<- "Hebei"
h01$provcode[h01$provcode== 23]<- "Heilongjiang"
h01$provcode[h01$provcode== 31]<- "Shanghai"
h01$provcode[h01$provcode== 33]<- "Zhejiang"
h01$provcode[h01$provcode== 34]<- "Anhui"
h01$provcode[h01$provcode== 44]<- "Guangdong"
h01$provcode[h01$provcode== 51]<- "Sichuan"
h01$provcode[h01$provcode== 61]<- "Shannxi"
h01<-mutate(h01,age=(2014 - Ab01))
summary(h01$age)
h01$age[h01$age > 60] <- "elderly" h01$age[h01$age > 40 & h01$age <= 60] <- "middle_aged" h01$age[h01$age > 18 & h01$age <= 40] <- "young"
h01$age[h01$age <= 18] <- "minors"
summary(h01$age)
table(h01$Ac01)
h01$female<-ifelse(h01$Ac01=="2",1,0)
table(h01$female)
h01$married<-ifelse(h01$Ad01=="3"|h01$Ad01=="4"|h01$Ad01=="5"|h01$Ad01=="6",1,0)
table(h01$married)
h01$hukou<- h01$Ae01
h01$hukou[h01$hukou==1|h01$hukou==3] <- "urban_local"
h01$hukou[h01$hukou==2|h01$hukou==4] <- "rural_local"
h01$hukou[h01$hukou==5] <- "urban_migrant"
h01$hukou[h01$hukou==6] <- "rural_migrant"
table(h01$hukou)
h01$illness<-h01$Ag01
h01$illness[h01$illness==1] <- "no"
h01$illness[h01$illness==2] <- "ill_able"
h01$illness[h01$illness==3|h01$illness==4] <- "ill_disable"
table(h01$illness)
table(h01$Ah401)
h01$med_insurance<-h01$Ah001
h01$med_insurance[h01$med_insurance== "0"] <- "no"
h01$med_insurance[h01$med_insurance== "1"] <- "one"
h01$med_insurance[h01$med_insurance== "2"|h01$med_insurance== "3"|h01$med_insurance== "4"] <- "multi"
table(h01$med_insurance)
h01$urbmi<-ifelse(h01$Ah201== "1"|h01$Ah301== "1", 1,0)
h01$uebmi<-ifelse(h01$Ah101== "1"|h01$Ah401== "1", 1,0)
h01$nrcmi<-ifelse(h01$Ah601== "1"|h01$Ah701== "1", 1,0)
h01$private<-ifelse(h01$Ah501== "1", 1,0)
h01$no_insurance<-ifelse(h01$Ah801== "1", 1,0)
table(h01$nrcmi)
h01$medi_types<- h01$nrcmi
h01$medi_types[h01$urbmi==1] <- "urbmi"
h01$medi_types[h01$uebmi==1] <- "uebmi"
h01$medi_types[h01$nrcmi==1] <- "nrcmi"
h01$medi_types[h01$private==1] <- "private"
h01$medi_types[h01$no_insurance==1] <- "no_insurance"
table(h01$medi_types)
h01$employment<- h01$Aj01
h01$employment[h01$employment== "1"] <- "employed"
h01$employment[h01$employment== "3"|h01$employment== "4"] <- "student"
h01$employment[h01$employment== "2"] <- "unemployed"
table(h01$employment)
h01$education<- h01$Ai01
levels(h01$education) <- c(levels(h01$education), "primary","middle","secondary","university")
h01$education[h01$education==1|h01$education==2|h01$education==3] <- "primary"
h01$education[h01$education==4|h01$education==5] <- "middle"
h01$education[h01$education==6|h01$education==7] <- "secondary"
h01$education[h01$education==8|h01$education==9|h01$education==10] <- "colleges"
table(h01$education)
h01$occupational_class<- h01$B01_1
h01$occupational_class[h01$occupational_class==4|h01$occupational_class==5|h01$occupational_class==6|h01$occupational_class==7] <- "state_owned"
h01$occupational_class[h01$occupational_class==8|h01$occupational_class==9|h01$occupational_class== 10] <- "private_sector"
h01$occupational_class[h01$occupational_class== 11|h01$occupational_class== 12|h01$occupational_class== 13|h01$occupational_class== 14] <- "informal_sector"
h01$occupational_class[h01$occupational_class==1|h01$occupational_class==2|h01$occupational_class== 3] <- "rural_sector"
h01$occupational_class[h01$occupational_class== -9|h01$occupational_class== -8] <- "unemployed"
h01$occupational_class[h01$employment=="student"]<-"student"
table(h01$occupational_class)
h01$health_use<-h01$F01
h01$health_use[h01$health_use==1] <- "self_medication"
h01$health_use[h01$health_use==2|h01$health_use==3|h01$health_use==4|h01$health_use==5] <- "PHNs"
h01$health_use[h01$health_use==6] <- "tertiary_hospital"
table(h01$health_use)
h01$health_seeking<-h01$health_use
h01$self_medication<-ifelse(h01$F01==1, 1,0)
h01$PHNs<-ifelse(h01$F01==2|h01$F01==3|h01$F01==4|h01$F01==5, 1,0)
h01$tertiary_hospital<-ifelse(h01$F01==6, 1,0)
table(h01$self_medication)
h01$hospital_lose<-h01$F03
h01$hospital_lose<-ifelse(h01$hospital_lose=="1", 1,0)
table(h01$hospital_lose)
h01$hospital_lose_eco<-h01$F031
h01$hospital_lose_eco<-ifelse(h01$hospital_lose_eco== "1" & h01$hospital_lose== "1", 1,0)
table(h01$hospital_lose_eco)
h01$perceived_med<-h01$D07
h01$perceived_med<-ifelse(h01$perceived_med== "7", 1,0)
table(h01$perceived_med)

## Medcover, medical cost covered by the insurance, percent level;F041 = actual medical cost, F042 = actual medical cover amount;

h01$F041[h01$F041== -9| h01$F041== -8] <- 0
h01$F042[h01$F042== -9| h01$F042== -8] <- 0
h01<-mutate(h01,medcover=F042/(F041+F042))
sum(is.na(h01$medcover))
summary(h01$medcover)
h01$income<-h01$D01
h01$income[h01$income==1|h01$income==2] <- "low"
h01$income[h01$income==3|h01$income== 4] <- "m_low"
h01$income[h01$income==6|h01$income== 5] <- "middle"
h01$income[h01$income==7] <- "m_high"
h01$income[h01$income==9|h01$income == 8] <- "high"
h01$income_level<-h01$income
table(h01$income_level)
save(h01,file = "h01.Rda")

## Select variables and change the property

load("h01.Rda")
h02<- dplyr::select (h01,provcode,perceived_med,hospital_lose_eco, hospital_lose,health_seeking,self_medication,PHNs,tertiary_hospital, income_level,occupational_class,education,employment,medcover,nrcmi,private,uebmi,urbmi, no_insurance, med_insurance,illness,hukou,married,female,age,medi_types)
h02[sapply(h02, is.character)] <- lapply(h02[sapply(h02, is.character)], as.factor)
Set dummy variables
levels(h02$age)
h02$age <-factor(h02$age, order=TRUE,levels=c("minors", "young", "middle_aged", "elderly"))
levels(h02$age)
levels(h02$hukou)
h02$hukou <- relevel(h02$hukou, "urban_local")
levels(h02$hukou)
levels(h02$occupational_class)
h02$occupational_class <- relevel(h02$occupational_class, "unemployed")
levels(h02$occupational_class)
levels(h02$education)
h02$education <-factor(h02$education, order=TRUE,levels=c("primary", "middle", "secondary", "colleges"))
levels(h02$education)
levels(h02$illness)
h02$illness <- relevel(h02$illness, "no")
levels(h02$illness)
levels(h02$health_seeking)
h02$health_seeking<- relevel(h02$health_seeking, "self_medication")
levels(h02$health_seeking)
levels(h02$medi_types)
h02$medi_types<- relevel(h02$medi_types, "no_insurance")
levels(h02$medi_types)
levels(h02$income_level)
h02$income_level<-factor(h02$income_level, order=TRUE,levels=c("low", "m_low", "middle", "m_high", "high"))
save(h02,file = "h02.Rda")
str(h02)

## Make the data-frames ready to use

library(data.table)
setnames(h02, old=c("provcode","self_medication","tertiary_hospital","income_level","occupational_class","education","no_insurance","medi_types", "health_seeking"), new=c("province", "self_med","tertiary_h","inc","occ","edu","no_ins","med_type","h_s_p"))
h04<- dplyr::select (h02,province,female,age,hukou,edu,inc,occ,med_type,nrcmi,private,uebmi,urbmi, no_ins,h_s_p,self_med,PHNs,tertiary_h)
h04[sapply(h04, is.numeric)] <- lapply(h04[sapply(h04, is.numeric)], as.factor)
summary(h04)
library(dplyr)
summary(is.na(h02$medcover))
h02<-na.omit(h02, cols="medcover")
h03<-dplyr::select (h02,province,female,age,hukou,edu,inc,occ,med_type,nrcmi,private,uebmi,urbmi, no_ins,h_s_p,self_med,PHNs,tertiary_h,medcover)
str(h03)
save(h03,file = "h03.Rda")

## Analyses- Multilevel-linear regression and multilevel-logistic regression models

phm<-table(h04$hukou,h04$med_type)
prop.table(phm,1)
library(ggplot2)
ggplot(data = h04,
mapping = aes(x= hukou,
y= med_type,fill= med_type))+
geom_bar(stat = "identity")+
labs(x= "Hukou types", y= "Medical insurance types", fill = "Medical insurance types")
if (!require("pacman")) install.packages("pacman")
pacman::p_load(sjPlot, sjlabelled, sjmisc, ggplot2)
load("h03.Rda")
contrasts(h03$age) <- contr.treatment(4)
contrasts(h03$edu) <- contr.treatment(4)
contrasts(h03$inc) <- contr.treatment(5)
library(lme4)
library(lmerTest)
model2<-lmer(medcover ~ age+female+hukou+edu+inc+occ+med_type + (1 | province),
data=h03)
summary(model2)
dotplot(ranef(model2,condVar=TRUE))
model2.1<-lmer(medcover ~ hukou+edu+inc+occ+med_type + (1 | province),
data=h03)
summary(model2.1)
plot_model(model2.1, vline.color = "red",show.values = TRUE, value.offset = .5)
contrasts(h04$age) <- contr.treatment(4)
contrasts(h04$edu) <- contr.treatment(4)
contrasts(h04$inc) <- contr.treatment(5)
library(lme4)
model1<-glmer(self_med ~ age + female + edu + inc + occ + med_type + hukou + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model1)
dotplot(ranef(model1,condVar=TRUE))
model1.1<-glmer(self_med ~ hukou + edu + inc + occ + med_type + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model1.1)
plot_model(model1.1, vline.color = "red")
plot_model(model1.1, transform = "plogis",show.values = TRUE, value.offset = .5)
model3<-glmer(PHNs ~ age + female + hukou + edu + inc + occ + med_type + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model3)
dotplot(ranef(model3,condVar=TRUE))
model3.1<-glmer(PHNs ~ hukou + edu + occ + med_type + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model3.1)
plot_model(model3.1, vline.color = "red")
plot_model(model3.1, transform = "plogis", show.values = TRUE, value.offset = .5)
model4<- glmer(tertiary_h ~ age + female + hukou + edu + inc + occ + med_type + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model4)
dotplot(ranef(model4,condVar=TRUE))
model4.1<- glmer(tertiary_h ~ hukou + edu + inc + occ + med_type + (1 | province), data = h04, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(model4.1)
plot_model(model4.1, vline.color = "red")
plot_model(model4.1, transform = "plogis",show.values = TRUE, value.offset = .5)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s