列联表分析
发生 | 未发生 | 合计 | |
---|---|---|---|
对照组 | a | b | \(N_0\) |
试验组 | c | d | \(N_1\) |
率比(Risk Ratio),也称相对危险度(Relative Risk): \(RR=\frac{a/N_0}{c/N_1}\),相对量,这里表示对照组发生事件是试验组发生事件的多少倍。
率差(Rate Difference): \(RD=\frac{a}{N_0}-\frac{c}{N_1}\),绝对量,率差能够直观的看出疗效的大小,可理解为试验组比对照组多百分之多少的可能发生事件。
优势比,也称比值比(Odds Ratio):\(OR=\frac{a/b}{c/d}\),这里反映了组别之间发生事件的关联强度。
率差
假设样本均值等于总体均值,其置信区间以均值为中心分布在其两侧。方法:正态近似法。
data test1;
gp="A";r=0;n=13;output;
gp="A";r=1;n=48;output;
gp="B";r=0;n=8;output;
gp="B";r=1;n=56;output;
run;
**** Method 1 ****;
proc freq data=test1;
tables gp*r/ riskdiff(cl=exact);
exact riskdiff;
weight n/ zeros;
run;
假设样本均值不等于总体均值,样本出现在总体分布的置信区间内。方法:Miettinen and Nurminen.
试验组与对照组的比率不是都为100%或0%(MN方法)
data test1;
gp="A";r=0;n=13;output;
gp="A";r=1;n=48;output;
gp="B";r=0;n=8;output;
gp="B";r=1;n=56;output;
run;
**** Method 1 ****;
proc freq data=test1;
tables gp*r/ riskdiff(cl=mn);
weight n/ zeros;
ods output pdiffcls=mn;
run;
试验组与对照组的比率都为100%或0%(MN方法)
%macro ratediff(n1=,n1_event=,n2=,n2_event=,alpha=,side=);
data mienur;
n1=&n1.;
a1=&n1_event.;
a2=n1-a1;
p1=a1/n1;
n2=&n2.;
a3=&n2_event.;
a4=n2-a3;
p2=a3/n2;
z=probit(1-&alpha./&side.);
d=p1-p2;
**率差置信区间下限;
**率差置信区间上限;
l_diff=-(z**2*(a1+a3)/((a1+a3-1)*a1))/((z**2*(a1+a3)/((a1+a3-1)*a1))+1);
u_diff= (z**2*(a1+a3)/((a1+a3-1)*a3))/((z**2*(a1+a3)/((a1+a3-1)*a3))+1);
run;
%mend ratediff;
%ratediff(n1=100,n1_event=100,n2=100,n2_event=100,alpha=0.05,side=2);
分层的Miettinen-Nurminen方法计算率差与P值
rrv(c0,c1,s0,s1,z,rd,ra,rb,vv);/*根据rd计算r0和r1*/
%macro =&c0.;
c0=&c1.;
c1=&s0.;
s0=&s1.;
s1=c1+c0;
c=s1+s0;
s=c1/s1;
p1=c0/s0;
p0=&z.**2;
z2=&rd.;
rd
=s;
l3=(s1+2*s0)*rd-s-c;
l2=(s0*rd-s-2*c0)*rd+c;
l1=c0*rd*(1-rd);
l0
=l2**3/(3*l3)**3-l1*l2/(6*l3**2)+l0/(2*l3);
q=sign(q)*(l2**2/(3*l3)**2-l1/(3*l3))**0.5;
pif p=0 then aa=0;
else aa=q/p**3;
if abs(aa)>1 then aa=1;
=1/3*(pi+arcos(aa));
a=2*p*cos(a)-l2/(3*l3);
r0
=r0+rd;
r1=(r1*(1-r1)/s1+r0*(1-r0)/s0)*s/(s-1);
v&ra.=r0;
&rb.=r1;
&vv.=v;
%mend;
stra_mn(dat=,stra=,trt=,c=,n=,alp=,Wmethod=2,all=0,out=);
%macro
%if &stra.=0 %then %do;
=&dat. out=dat1;
proc sort data&trt.;
by
run;
data dat1x;=nob;
set dat1 nobs&trt.;
by ccc(2);
array nnn(2);
array : nnn:;
retain cccif first.&trt. then do;
+1;
iccc(i)=&c.;
nnn(i)=&n.;
end;if _N_=nob then do;
=ccc(1);
ca=ccc(2);
cb=nnn(1);
na=nnn(2);
nb
output;
end;
keep ca cb na nb;
run;
data dat2;
set dat1x;=probit(1-&alp./2);
z=constant("pi");
pi=ca/na;
pa=cb/nb;
pb
/*计算置信下限*/
=0;
str=0;
qxq=1 to 3;
do qcyc
=10**(-qcyc*2);
poit=10;
lmin=str+qxq*poit*100;
str
=-99 to 99;
do cyc=str+cyc*poit;
rdlxrrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
%if abs(vv)<10**(-12) then vv=0;
=pb-pa-(z*z*vv)**0.5; /*置信下限*/
rdxif vv^=0 then diff=abs(rdx-rdlx);
if diff<lmin then do;
=diff;
lmin=cyc;
qxq
end;if &all.=1 then output;
end;
end;=str+qxq*poit;
rdl
/*计算置信上限*/
=0;
str=0;
qxq=1 to 3;
do qcyc
=10**(-qcyc*2);
poit=10;
dmin=str+qxq*poit*100;
str
=-99 to 99;
do cyc=str+cyc*poit;
rdlxrrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
%if abs(vv)<10**(-12) then vv=0;
=pb-pa+(z*z*vv)**0.5; /*置信上限*/
rdxif vv^=0 then diff=abs(rdx-rdlx);
if diff<dmin then do;
=diff;
dmin=cyc;
qxq
end;if &all.=1 then output;
end;
end;=str+qxq*poit;
rdu
/*Z统计量*/
=0;
rdlxrrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
%if abs(vv)<10**(-16) then Zsc=0;
else Zsc=-abs((pb-pa)/vv**0.5);
=probnorm(Zsc)*2;
Pvalue
/*率差*/
=pb-pa;
rankdiff
output;
%if &all.=1 %then %do;
keep diff rdx rdlx rdl ra rb vv rdl dmin rdu lmin str qxq;
%end;%else %do;
keep rdl rdu dmin lmin Zsc Pvalue rankdiff ;
%end;
run;
%end;
/**************************************************/
/*分层MN*/
%else %do;
=&dat. out=dat1;
proc sort data&stra. &trt.;
by
run;
data _NULL_;=nob;
set dat1 nobscc(99);
array nn(99);
array : nn:;
retain cc&stra. &trt.;
by +1;
xcc(x)=&c.;
nn(x)=&n.;
if _N_=nob then do;
=x/2;
grp=1 to grp;
do isymputx("c"||strip(put(i,best.))||"_0",strip(put(cc(i*2-1),best.)));
call symputx("c"||strip(put(i,best.))||"_1",strip(put(cc(i*2),best.)));
call symputx("n"||strip(put(i,best.))||"_0",strip(put(nn(i*2-1),best.)));
call symputx("n"||strip(put(i,best.))||"_1",strip(put(nn(i*2),best.)));
call
end;symputx("Ntrt",strip(put(x/2,best.)));
call
end;
run;
data dat2;ca(&Ntrt.);
array cb(&Ntrt.);
array na(&Ntrt.);
array nb(&Ntrt.);
array pa(&Ntrt.);
array pb(&Ntrt.);
array ra(&Ntrt.);
array rb(&Ntrt.);
array vv(&Ntrt.);
array wfr(&Ntrt.);
array sw(&Ntrt.);
array =&Ntrt.;
ntrt%do i=1 %to &Ntrt.;
&i.=&&c&i._0.;
ca&i.=&&c&i._1.;
cb&i.=&&n&i._0.;
na&i.=&&n&i._1.;
nb
%end;=probit(1-&alp./2);
z=constant("pi");
pi=1 to ntrt;
do ipa(i)=ca(i)/na(i);
pb(i)=cb(i)/nb(i);
end;
/*率差*/
=0;
ew=1 to ntrt;
do i%if &wmethod.=1 %then %do;
wfr(i)=(na(i)+nb(i));
%end;%if &wmethod.=2 %then %do;
wfr(i)=1/(1/na(i)+1/nb(i));
%end;=ew+wfr(i);
ew
end;=0;
rra=0;
rrb=1 to ntrt;
do isw(i)=wfr(i)/ew;
=rra+sw(i)*pa(i);
rra=rrb+sw(i)*pb(i);
rrb
end;=rrb-rra;
rankdiff
/*计算置信下限*/
=0;
str=0;
qxq=1 to 3;
do qcyc
=10**(-qcyc*2);
poit=10;
lmin=str+qxq*poit*100;
str
=-99 to 99;
do cyc=str+cyc*poit;
rdlx
=1 to ntrt;
do irrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
%if abs(vv(i))<10**(-12) then vv(i)=0;
end;
=0;
rra=0;
rrb=0;
vxv=1 to ntrt;
do i=rra+sw(i)*pa(i);
rra=rrb+sw(i)*pb(i);
rrb=vxv+vv(i)*sw(i)*sw(i);
vxv
end;=rrb-rra-(z*z*vxv)**0.5; /*置信下限*/
rdx=abs(rdx-rdlx);
diffif diff<lmin then do;
=diff;
lmin=cyc;
qxq
end;if &all.=1 then output;
end;
end;
=str+qxq*poit;
rdl
/*计算置信上限*/
=0;
str=0;
qxq=1 to 3;
do qcyc
=10**(-qcyc*2);
poit=10;
dmin=str+qxq*poit*100;
str
=-99 to 99;
do cyc=str+cyc*poit;
rdlx
=1 to ntrt;
do irrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
%if abs(vv(i))<10**(-12) then vv(i)=0;
end;
=0;
rra=0;
rrb=0;
vxv=1 to ntrt;
do i=rra+sw(i)*pa(i);
rra=rrb+sw(i)*pb(i);
rrb=vxv+vv(i)*sw(i)*sw(i);
vxv
end;=rrb-rra+(z*z*vxv)**0.5; /*置信上限*/
rdx=abs(rdx-rdlx);
diffif diff<dmin then do;
=diff;
dmin=cyc;
qxq
end;if &all.=1 then output;
end;
end;
=str+qxq*poit;
rdu
/*Z统计量*/
=0;
rdlx
=1 to ntrt;
do irrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
%if abs(vv(i))<10**(-12) then vv(i)=0;
end;
=0;
rra=0;
rrb=0;
vxv=1 to ntrt;
do i=rra+sw(i)*pa(i);
rra=rrb+sw(i)*pb(i);
rrb=vxv+vv(i)*sw(i)*sw(i);
vxv
end;=-abs((rrb-rra)/vxv**0.5);
Zsc=probnorm(Zsc)*2;
Pvalue
output;
%if &all.=1 %then %do;
: rb: rdl dmin;
keep diff rdx rdlx rdl rra rrb vxv ra
%end;%else %do;
keep rdl rdu dmin lmin Zsc Pvalue rankdiff ;
%end;
run;
%end;
&out.;
data
set dat2;=rankdiff;
RateDiff=rdl;
LowerCL=rdu;
UpperCL=Zsc;
Z=Pvalue;
P
keep ratediff LowerCL UpperCL Z P;
run;
%mend;
/*dat:输入的数据集文件名,stra:分层因素,trt:分组变量名,c:事件数的变量名,n:样本量的变量名,每一层的n都不能为0*/
/*Wmethod为权重计算方法,本次分析使用Wmethod=2*/
/*Wmethod=1: Wi=n0i+n1i*/
/*Wmethod=2: Wi=1/(1/n0i+1/n1i)*/
/*当stra设为0时,为不分层的MN方法,但优先推荐使用SAS自带的procedure计算不分层的MN方法*/
逆方差法(Inverse Variance Method)计算率差及其置信区间
/* 创建原始数据集 */
data studies;
input study $ age_group $ n_treatment success_treatment n_control success_control;
datalines;
1 30-40 100 45 100 30
2 40-50 80 50 90 40
3 30-40 150 75 150 60
4 40-50 120 70 130 55
;
run;
/* 计算各组的比例差及其标准误 */
data effect_sizes;
set studies;
/* 计算治疗组和对照组的成功率 */
p_treatment = success_treatment / n_treatment;
p_control = success_control / n_control;
/* 计算比例差 */
prop_diff = p_treatment - p_control;
/* 计算标准误 */
se_prop_diff = sqrt((p_treatment * (1 - p_treatment) / n_treatment) +
(p_control * (1 - p_control) / n_control));
output; /* 输出每个研究的效应量和标准误 */
run;
/* 计算每个分层组的权重(逆方差法) */
data weighted_effects;
set effect_sizes;
weight = 1 / (se_prop_diff**2); /* 逆方差法 */
run;
/* 合成总体效应量 */
proc sql;
select
sum(weight * prop_diff) / sum(weight) as combined_prop_diff,
sqrt(1 / sum(weight)) as combined_se
into :combined_diff, :combined_se
from weighted_effects;
quit;
/* 计算95%置信区间 */
data confidence_interval;
lower_bound = &combined_diff - 1.96 * &combined_se; /* 正常置信区间计算 */
upper_bound = &combined_diff + 1.96 * &combined_se;
combined_proportion_difference = &combined_diff;
run;
data weightd_q;
set weighted_effects;
qdiff = weight * (prop_diff - &combined_diff.)**2;
run;
proc sql noprint;
select sum(qdiff) into: combined_q from weightd_q;
quit;
/* 计算P值 - 使用卡方检验 */
data p_value;
Q = &combined_q;
p_value = 1 - probchi(Q, 3); /* 双尾P值计算 */
run;
R语言计算RD, OR, RR
R语言sasLM package(https://cran.r-project.org/web/packages/sasLM/sasLM.pdf)可以用于计算率差(RD),率比(RR),优势比(OR)
library(sasLM)
###计算几何均值
geoMean(mtcars$mpg)
geoCV(mtcars$mpg)
###无连续性校正
RD(y1=55,
n1=67,
y2=43,
n2=51)
###分层的MN方法计算率差
<- c(10, 12)
y1 <- c(30, 50)
n1 <- c(43, 55)
y2 <- c(90, 80)
n2 <- data.frame(y1, n1, y2, n2)
d0 RDmn(d0, conf.level = 0.95)
###没有分层的MN方法计算率差
RDmn1(y1=55,
n1=67,
y2=43,
n2=51)
###逆方差法(inverse variance method)计算率差
RDinv(d0, conf.level = 0.95)