Data analysis of the ASPC Paper “Social Health Insurance Inequalities in China” – RStudio Scripts

#RStudio setting:

rm(list=ls()) ## remove objects from workplace
setwd("D:/R/leedsdata") ## set workdirectory
getwd()

# Import the data

library(readxl)
## readxl_example()
## ??readxl
healthcare<-read_excel("Healthcare2014.xlsx", "Sheet1")
head(healthcare)
str(healthcare)
sum(is.na(healthcare))
save(healthcare,file = "healthcare.Rda")

# Select variables

library(dplyr)
load("healthcare.Rda")
healthcare01<- dplyr::select (healthcare, provcode,
Ab01,Ac01, Ad01,Ae01,Ag01,Ah001,Ai01,Aj01,Ah101,Ah201,Ah301,Ah401,Ah501,Ah601,Ah701,Ah801,
B01_1,B02_1,B07_1,B07_1,B07_2,B07_3,B07_4,B07_5,B07_6,B07_7,B07_8,B07_9,
D01,D06,
F01,F02,F021,F022,F023,F024,F025,F031,F041,F042,F043,F04C,F03,F06)
useBytes = TRUE
str(healthcare01)
save(healthcare01,file = "healthcare01.Rda")
load("healthcare01.Rda")
library(dplyr)
h01<-filter(healthcare01,Ab01>0&Ac01>0&Ae01>0&Ag01>0&Ai01>0&Ah001>=0&Ah101>=0&Ah201>=0&Ah301>=0&Ah401>=0&Ah501>=0&Ah601>=0&Ah701>=0&Ah801>=0&F01>0&D01>=0&F01>0&F03>0)
h01<-filter(h01,B01_1>0|B01_1< -7)
h01<-filter(h01,F041< -7 |F041>= 0)
h01<-filter(h01,F042< -7 |F042>= 0)
sum(is.na(h01))
str(h01)

# Rename variables

## province
h01$provcode[h01$provcode== 13]<- "Hebei"
h01$provcode[h01$provcode== 23]<- "Heilongjiang"
h01$provcode[h01$provcode== 31]<- "Shanghai"
h01$provcode[h01$provcode== 33]<- "Zhejiang"
h01$provcode[h01$provcode== 34]<- "Anhui"
h01$provcode[h01$provcode== 44]<- "Guangdong"
h01$provcode[h01$provcode== 51]<- "Sichuan"
h01$provcode[h01$provcode== 61]<- "Shannxi"
h01$province<-h01$provcode

## age, ##groups
h01<-mutate(h01,age=(2014 - Ab01))
summary(h01$age)

h01$age[h01$age > 60] <- "elderly"
h01$age[h01$age > 40 & h01$age <= 60] <- "middle_aged"
h01$age[h01$age > 18 & h01$age <= 40] <- "young"
h01$age[h01$age <= 18] <- "minors"

summary(h01$age)

table(h01$Ac01)
## female,
h01$female<-ifelse(h01$Ac01=="2",1,0)
table(h01$female)

## hukou,
h01$hukou<- h01$Ae01
h01$hukou[h01$hukou==1|h01$hukou==3] <- "urban_local"
h01$hukou[h01$hukou==2|h01$hukou==4] <- "rural_local"
h01$hukou[h01$hukou==5] <- "urban_migrant"
h01$hukou[h01$hukou==6] <- "rural_migrant"
table(h01$hukou)

## illness,
h01$illness<-h01$Ag01
h01$illness[h01$illness==1] <- "no"
h01$illness[h01$illness==2] <- "ill_able"
h01$illness[h01$illness==3|h01$illness==4] <- "ill_disable"
table(h01$illness)

## med_insurance,
table(h01$Ah401)
h01$med_insurance<-h01$Ah001
h01$med_insurance[h01$med_insurance== "0"] <- "no"
h01$med_insurance[h01$med_insurance== "1"] <- "one"
h01$med_insurance[h01$med_insurance== "2"|h01$med_insurance== "3"|h01$med_insurance== "4"] <- "multi"
table(h01$med_insurance)

## medical insurance schemes
## schemes
h01$uebmi<-ifelse(h01$Ah101== "1", 1,0)
h01$urbmi<-ifelse(h01$Ah201== "1", 1,0)
h01$critical_illness<-ifelse(h01$Ah301== "1", 1,0)
h01$free_medicare<-ifelse(h01$Ah401== "1", 1,0)
h01$private<-ifelse(h01$Ah501== "1", 1,0)
h01$nrcmi<-ifelse(h01$Ah601== "1", 1,0)
h01$secondary_reimbursement<-ifelse(h01$Ah701== "1", 1,0)
h01$no_insurance<-ifelse(h01$Ah801== "1", 1,0)
table(h01$secondary_reimbursement)


## education,
h01$education<- h01$Ai01
levels(h01$education) <- c(levels(h01$education),"no_edu", "primary","middle","senior","vocational_college","bachelor","postgraduate")
h01$education[h01$education==1] <- "no_edu"
h01$education[h01$education==2|h01$education==3] <- "primary"
h01$education[h01$education==4|h01$education==5] <- "middle"
h01$education[h01$education==6|h01$education==7] <- "senior"
h01$education[h01$education==8] <- "vocational_college"
h01$education[h01$education==9] <- "bachelor"
h01$education[h01$education==10] <- "postgraduate"
table(h01$education)

## occupation,
h01$occupation<- h01$B01_1
h01$occupation[h01$occupation==4|h01$occupation==5|h01$occupation==6] <- "public_servant"
h01$occupation[h01$occupation==7] <- "state_owned"
h01$occupation[h01$occupation==8] <- "private_enterprise"
h01$occupation[h01$occupation==9] <- "transnational_corporation"
h01$occupation[h01$occupation==10] <- "NPO"
h01$occupation[h01$occupation==11] <- "small_business"
h01$occupation[h01$occupation==12] <- "self_employment"
h01$occupation[h01$occupation==13] <- "casual_workers"
h01$occupation[h01$occupation==14|h01$occupation== -9|h01$occupation== -8] <- "other"
h01$occupation[h01$occupation==1|h01$occupation==2|h01$occupation== 3] <- "famers"
table(h01$occupation)

## health care services utilisation

h01$health_u<-h01$F01
h01$health_u[h01$health_u== 1] <- "slef_purchase"
h01$health_u[h01$health_u==2] <- "village_clinics"
h01$health_u[h01$health_u==3] <- "community_health_centre"
h01$health_u[h01$health_u==4] <- "township_health_centre"
h01$health_u[h01$health_u==5] <- "district_county_hospital"
h01$health_u[h01$health_u==6] <- "general_hospital"
table(h01$health_u)

## healthcare services trust
h01$health_t<-h01$F02
h01$health_t[h01$health_t==1] <- "public_hospital"
h01$health_t[h01$health_t==2] <- "private_hospital"
h01$health_t[h01$health_t==3] <- "all_none"
h01$health_t[h01$health_t==4] <- "all_trust"
h01$health_t[h01$health_t==5|h01$health_t== "-1"] <- "unclear"
table(h01$health_t)
str(h01)
library(ggplot2)
ggplot(data = h01) +
geom_bar(mapping = aes(x = h01$health_t, y=..prop.., group = 1))

## hospital_absence, should be hospitalised but accutally not, 1 means it occurred
h01$hospital_absence<-h01$F03
h01$hospital_absence<-ifelse(h01$hospital_absence=="1", 1,0)
table(h01$hospital_absence)

## absence_reason, who are not hospitalised with economic reasons
h01$absence_reason<-h01$F031
h01$absence_reason[h01$absence_reason== 1]<-"no_money"
h01$absence_reason[h01$absence_reason== 2]<-"no_time"
h01$absence_reason[h01$absence_reason== 3]<-"beds_unavailabe"
h01$absence_reason[h01$absence_reason== 4]<-"poor_conditions"
h01$absence_reason[h01$absence_reason== 5]<-"untrustworthy_doctors"
h01$absence_reason[h01$absence_reason== 6]<-"not_necessary"
h01$absence_reason[h01$absence_reason== 7]<-"other"

table(h01$absence_reason)

## medcover, medical cost covered by the insurance, percent level;F041 = actual medical cost, F042 = actual medical cover amount;
h01$F041[h01$F041== -9| h01$F041== -8] <- 0
h01$F042[h01$F042== -9| h01$F042== -8] <- 0
h01<-mutate(h01,medcover=F042/(F041+F042))
sum(is.na(h01$medcover))
summary(h01$medcover)

## income, ## this variable is originally ordinal data,
## values are: low< 30,000; 30,000<m_low<80,000; 80,000<middle<150,000; 150,000<m_high<250,000; high>250,000.
h01$income<-h01$D01
h01$income[h01$income==1|h01$income==2] <- "low"
h01$income[h01$income==3|h01$income== 4] <- "m_low"
h01$income[h01$income==6|h01$income== 5] <- "middle"
h01$income[h01$income==7] <- "m_high"
h01$income[h01$income==9|h01$income == 8] <- "high"
h01$income_level<-h01$income
table(h01$income_level)
h01$distrust1<-h01$F021
h01$distrust2<-h01$F022
h01$distrust3<-h01$F023
h01$distrust4<-h01$F024
h01$distrust5<-h01$F025

save(h01,file = "h01.Rda")
 

# Select variables and change the property

load(“h01.Rda”)
h02<- dplyr::select (h01,province,age,female,hukou,illness,med_insurance, uebmi,urbmi,critical_illness, free_medicare,private,nrcmi,secondary_reimbursement,no_insurance,education,occupation,health_u,health_t,hospital_absence, absence_reason,medcover, income, distrust1,distrust2,distrust3,distrust4,distrust5)


h02[sapply(h02, is.character)] <- lapply(h02[sapply(h02, is.character)], as.factor)

levels(h02$age)
h02$age <-factor(h02$age, order=TRUE,levels=c(“minors”, “young”, “middle_aged”, “elderly”))
levels(h02$age)

##  urban_local
levels(h02$hukou)
h02$hukou <- relevel(h02$hukou, “urban_local”)
levels(h02$hukou)

## 
levels(h02$occupation)
h02$occupation<- relevel(h02$occupation, “other”)
levels(h02$occupational_class)

## 
levels(h02$education)
h02$education <-factor(h02$education, order=TRUE,levels=c(“no_edu”, “primary”, “middle”,”senior”,”diploma”, “bachelor”,”postgraduate”))
levels(h02$education)

##
levels(h02$illness)
h02$illness <- relevel(h02$illness, “no”)
levels(h02$illness)


## 
levels(h02$income)
h02$income<-factor(h02$income, order=TRUE,levels=c(“low”, “m_low”, “middle”, “m_high”, “high”))
save(h02,file = “h02.Rda”)
str(h02)

# Analyses- Multilevel-linear regression and multilevel-logistic regression models

library(dplyr)
summary(is.na(h02$medcover))
h02<-na.omit(h02, cols="medcover")
h03<-dplyr::select (h02,province,female,age,hukou,edu,inc,occ,med_type,nrcmi,private,uebmi,urbmi, no_ins,h_s_p,self_med,PHNs,tertiary_h,medcover)
str(h03)
save(h03,file = "h03.Rda")

load("h03.Rda")
contrasts(h03$age) <- contr.treatment(4)
contrasts(h03$edu) <- contr.treatment(4)
contrasts(h03$inc) <- contr.treatment(5)
library(lme4)
library(lmerTest)
model2<-lmer(medcover ~ age+female+hukou+edu+inc+occ+nrcmi+private+uebmi+urbmi+no_ins + (1 | province),
data=h03)
summary(model2)
ranef(model2)

load("h02.Rda")
library(lme4)
m5<-glmer(self_medication ~ age + hukou + female + married + illness + education + income_level + occupational_class + perceived_med + nrcmi + uebmi + urbmi + private + noscheme + (1 + hukou |provcode),data=h02, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 0)
summary(m5)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s