import numpy as np
import pandas as pd
import statsmodels.api as sm
from statistics import stdev
data_df = pd.read_csv("rhc.csv")
cols =["cat1","sex","race","edu","income","resp","card","neuro","gastr","renal","meta","hema","seps","trauma","ortho","das2d3pc","dnr1","ca","surv2md1","aps1","scoma1","wtkilo1","temp1","resp1","hrt1","pafi1","paco21","ph1","wblc1","hema1","sod1","pot1","crea1","bili1","alb1","cardiohx","chfhx","dementhx","psychhx","chrpulhx","renalhx","liverhx","gibledhx","immunhx","transhx","amihx","age","meanbp1"]
categorical_columns =["cat1","sex","race","edu","income","ca","dnr1","resp","card","neuro","gastr","renal","meta","hema","seps","trauma","ortho"]# 交絡因子
X = data_df[cols]
dummy = pd.get_dummies(X[categorical_columns], drop_first=True)
X = pd.concat([X, dummy], axis=1)
X = X.drop(categorical_columns, axis=1)
X.loc[:,"Intercept"]=1# 処置群とコントロール群
D = pd.get_dummies(data_df["swang1"])["RHC"]# アウトカム
Y = pd.get_dummies(data_df["death"])["Yes"]# 傾向スコア
model = sm.Logit(D, X).fit(method="newton")
e = model.predict(X)
# マッチング
target_list = e[e.index.isin(D[D==1].index)]
ate =0
k =0for i in D[D==0].index:
num = e[e.index.isin(D[D==0].index)][i]
idx = np.abs(np.asarray(target_list)- num).argmin()if np.abs(target_list.iloc[idx]- num)> stdev(e)/100:continue
ate +=int(Y.loc[target_list.index[idx]])-int(Y.loc[i])
k +=1
ate = ate / k
# 層別化
interval = np.arange(0,1.05,0.05)
ate =0for i inrange(0,len(interval)-1):
temp0 = e[(e.index.isin(D[D==0].index))&(e>=interval[i])&(e<interval[i+1])].index
temp1 = e[(e.index.isin(D[D==1].index))&(e>=interval[i])&(e<interval[i+1])].index
if(len(temp0)>0)&(len(temp1)>0):
ate +=(Y[temp1].mean()- Y[temp0].mean())*(len(temp1)+len(temp0))
ate = ate /len(e)
# カーネルマッチング
f = sm.Logit(Y[Y.index.isin(D[D==0].index)], e[e.index.isin(D[D==0].index)]).fit(method="newton")
Y_hat = f.predict(e[e.index.isin(D[D==1].index)])
ate =(Y[Y.index.isin(D[D==1].index)]- Y_hat).sum()/len(Y_hat)
# DR
model0 = sm.Logit(Y[Y.index.isin(D[D==0].index)], X[X.index.isin(D[D==0].index)]).fit(method="ncg")
Y_hat0 = model0.predict(X)
model1 = sm.Logit(Y[Y.index.isin(D[D==1].index)], X[X.index.isin(D[D==1].index)]).fit(method="ncg")
Y_hat1 = model1.predict(X)
ate =((D * Y / e -(1-(D / e)* Y_hat1)).sum()-((1-D)* Y /(1-e)-(1-((1-D)/(1-e))* Y_hat0)).sum())/len(Y)
# OW
ato =(D *(1-e)* Y).sum()/(D *(1-e)).sum()-((1-D)* e * Y).sum()/((1-D)* e).sum()