列联表分析

发生 未发生 合计
对照组 a b \(N_0\)
试验组 c d \(N_1\)

率比(Risk Ratio),也称相对危险度(Relative Risk): \(RR=\frac{a/N_0}{c/N_1}\),相对量,这里表示对照组发生事件是试验组发生事件的多少倍。

率差(Rate Difference): \(RD=\frac{a}{N_0}-\frac{c}{N_1}\),绝对量,率差能够直观的看出疗效的大小,可理解为试验组比对照组多百分之多少的可能发生事件。

优势比,也称比值比(Odds Ratio):\(OR=\frac{a/b}{c/d}\),这里反映了组别之间发生事件的关联强度。

率差

假设样本均值等于总体均值,其置信区间以均值为中心分布在其两侧。方法:正态近似法。

data test1;
    gp="A";r=0;n=13;output;
    gp="A";r=1;n=48;output;
    gp="B";r=0;n=8;output;
    gp="B";r=1;n=56;output;
run;


**** Method 1 ****;
proc freq data=test1;
    tables gp*r/ riskdiff(cl=exact);
    exact riskdiff;
    weight n/ zeros;
run;

假设样本均值不等于总体均值,样本出现在总体分布的置信区间内。方法:Miettinen and Nurminen.

试验组与对照组的比率不是都为100%或0%(MN方法)

data test1;
    gp="A";r=0;n=13;output;
    gp="A";r=1;n=48;output;
    gp="B";r=0;n=8;output;
    gp="B";r=1;n=56;output;
run;


**** Method 1 ****;
proc freq data=test1;
    tables gp*r/ riskdiff(cl=mn);
    weight n/ zeros;
    ods output pdiffcls=mn;
run;

试验组与对照组的比率都为100%或0%(MN方法)

%macro ratediff(n1=,n1_event=,n2=,n2_event=,alpha=,side=);
data mienur;
n1=&n1.;
a1=&n1_event.;
a2=n1-a1;
p1=a1/n1;
n2=&n2.;
a3=&n2_event.;
a4=n2-a3;
p2=a3/n2;
z=probit(1-&alpha./&side.);
d=p1-p2;
**率差置信区间下限;
**率差置信区间上限;
l_diff=-(z**2*(a1+a3)/((a1+a3-1)*a1))/((z**2*(a1+a3)/((a1+a3-1)*a1))+1);
u_diff= (z**2*(a1+a3)/((a1+a3-1)*a3))/((z**2*(a1+a3)/((a1+a3-1)*a3))+1);
run;
%mend ratediff;

%ratediff(n1=100,n1_event=100,n2=100,n2_event=100,alpha=0.05,side=2);

分层的Miettinen-Nurminen方法计算率差与P值

%macro rrv(c0,c1,s0,s1,z,rd,ra,rb,vv);/*根据rd计算r0和r1*/
 c0=&c0.;
 c1=&c1.;
 s0=&s0.;
 s1=&s1.;
 c=c1+c0;
 s=s1+s0;
 p1=c1/s1;
 p0=c0/s0;
 z2=&z.**2;
 rd=&rd.;

 l3=s;
 l2=(s1+2*s0)*rd-s-c;
 l1=(s0*rd-s-2*c0)*rd+c;
 l0=c0*rd*(1-rd);

 q=l2**3/(3*l3)**3-l1*l2/(6*l3**2)+l0/(2*l3);
 p=sign(q)*(l2**2/(3*l3)**2-l1/(3*l3))**0.5;
 if p=0 then aa=0; 
 else aa=q/p**3;
 if abs(aa)>1 then aa=1;
 a=1/3*(pi+arcos(aa));
 r0=2*p*cos(a)-l2/(3*l3);

 r1=r0+rd;
 v=(r1*(1-r1)/s1+r0*(1-r0)/s0)*s/(s-1);
 &ra.=r0;
 &rb.=r1;
 &vv.=v;
%mend;

%macro stra_mn(dat=,stra=,trt=,c=,n=,alp=,Wmethod=2,all=0,out=);

%if &stra.=0 %then %do;

proc sort data=&dat. out=dat1;
 by &trt.;
run;

data dat1x;
 set dat1 nobs=nob;
 by &trt.;
 array ccc(2);
 array nnn(2);
 retain ccc: nnn:;
 if first.&trt. then do;
    i+1;
    ccc(i)=&c.;
    nnn(i)=&n.;
 end;
 if _N_=nob then do;
    ca=ccc(1);
    cb=ccc(2);
    na=nnn(1);
    nb=nnn(2);
    output;
 end;
 keep ca cb na nb;
run;

data dat2;
 set dat1x;
 z=probit(1-&alp./2);
 pi=constant("pi");
 pa=ca/na;
 pb=cb/nb;

/*计算置信下限*/

 str=0;
 qxq=0;
 do qcyc=1 to 3;

    poit=10**(-qcyc*2);
    lmin=10;
    str=str+qxq*poit*100;

 do cyc=-99 to 99;
    rdlx=str+cyc*poit;
    %rrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
    if abs(vv)<10**(-12) then vv=0;
    rdx=pb-pa-(z*z*vv)**0.5; /*置信下限*/
    if vv^=0 then diff=abs(rdx-rdlx);
    if diff<lmin then do;
       lmin=diff;
       qxq=cyc;
    end;
    if &all.=1 then output;
 end; 

 end;
 rdl=str+qxq*poit;

 /*计算置信上限*/
 str=0;
 qxq=0;
 do qcyc=1 to 3;

    poit=10**(-qcyc*2);
    dmin=10;
    str=str+qxq*poit*100;

 do cyc=-99 to 99;
    rdlx=str+cyc*poit;
    %rrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
    if abs(vv)<10**(-12) then vv=0;
    rdx=pb-pa+(z*z*vv)**0.5; /*置信上限*/
    if vv^=0 then diff=abs(rdx-rdlx);
    if diff<dmin then do;
       dmin=diff;
       qxq=cyc;
    end;
    if &all.=1 then output;
 end; 

 end;
 rdu=str+qxq*poit;
 
/*Z统计量*/

 rdlx=0;
 %rrv(ca,cb,na,nb,z,rdlx,ra,rb,vv);
 if abs(vv)<10**(-16) then Zsc=0;
 else Zsc=-abs((pb-pa)/vv**0.5);
 Pvalue=probnorm(Zsc)*2;

 /*率差*/
 rankdiff=pb-pa;
 output;

 %if &all.=1 %then %do;
     keep diff rdx rdlx rdl ra rb vv rdl dmin rdu lmin str qxq;
 %end;
 %else %do;
     keep rdl rdu dmin lmin Zsc Pvalue rankdiff ;
 %end;
run;

%end;



/**************************************************/
/*分层MN*/
%else %do;

proc sort data=&dat. out=dat1;
 by &stra. &trt.;
run;

data _NULL_;
 set dat1 nobs=nob;
 array cc(99);
 array nn(99);
 retain cc: nn:;
 by &stra. &trt.;
 x+1;
 cc(x)=&c.;
 nn(x)=&n.;
 if _N_=nob then do;
    grp=x/2;
    do i=1 to grp;
       call symputx("c"||strip(put(i,best.))||"_0",strip(put(cc(i*2-1),best.)));
       call symputx("c"||strip(put(i,best.))||"_1",strip(put(cc(i*2),best.)));
       call symputx("n"||strip(put(i,best.))||"_0",strip(put(nn(i*2-1),best.)));
       call symputx("n"||strip(put(i,best.))||"_1",strip(put(nn(i*2),best.)));
    end;
    call symputx("Ntrt",strip(put(x/2,best.)));
 end;
run;

data dat2;
 array ca(&Ntrt.);
 array cb(&Ntrt.);
 array na(&Ntrt.);
 array nb(&Ntrt.);
 array pa(&Ntrt.);
 array pb(&Ntrt.);
 array ra(&Ntrt.);
 array rb(&Ntrt.);
 array vv(&Ntrt.);
 array wfr(&Ntrt.);
 array sw(&Ntrt.);
 ntrt=&Ntrt.;
 %do i=1 %to &Ntrt.;
     ca&i.=&&c&i._0.;
     cb&i.=&&c&i._1.;
     na&i.=&&n&i._0.;
     nb&i.=&&n&i._1.;
 %end;
 z=probit(1-&alp./2);
 pi=constant("pi");
 do i=1 to ntrt;
    pa(i)=ca(i)/na(i);
    pb(i)=cb(i)/nb(i);
 end;

 /*率差*/
 ew=0;
 do i=1 to ntrt;  
    %if &wmethod.=1 %then %do; 
        wfr(i)=(na(i)+nb(i));
    %end;
    %if &wmethod.=2 %then %do; 
        wfr(i)=1/(1/na(i)+1/nb(i));
    %end;
    ew=ew+wfr(i);
 end;
 rra=0;
 rrb=0;
 do i=1 to ntrt;
    sw(i)=wfr(i)/ew;
    rra=rra+sw(i)*pa(i);
    rrb=rrb+sw(i)*pb(i);
 end;
 rankdiff=rrb-rra;

 /*计算置信下限*/
 str=0;
 qxq=0;
 do qcyc=1 to 3;

    poit=10**(-qcyc*2);
    lmin=10;
    str=str+qxq*poit*100;

 do cyc=-99 to 99;
    rdlx=str+cyc*poit;
  
    do i=1 to ntrt;
       %rrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
       if abs(vv(i))<10**(-12) then vv(i)=0;
    end;

    rra=0;
    rrb=0;
    vxv=0;
    do i=1 to ntrt;
       rra=rra+sw(i)*pa(i);
       rrb=rrb+sw(i)*pb(i);
       vxv=vxv+vv(i)*sw(i)*sw(i);
    end;
    rdx=rrb-rra-(z*z*vxv)**0.5; /*置信下限*/
    diff=abs(rdx-rdlx);
    if diff<lmin then do;
       lmin=diff;
       qxq=cyc;
    end;
    if &all.=1 then output;
 end; 

 end;

 rdl=str+qxq*poit;


 /*计算置信上限*/
 str=0;
 qxq=0;
 do qcyc=1 to 3;

    poit=10**(-qcyc*2);
    dmin=10;
    str=str+qxq*poit*100;

 do cyc=-99 to 99;
    rdlx=str+cyc*poit;
  
    do i=1 to ntrt;
       %rrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
       if abs(vv(i))<10**(-12) then vv(i)=0;
    end;

    rra=0;
    rrb=0;
    vxv=0;
    do i=1 to ntrt;
       rra=rra+sw(i)*pa(i);
       rrb=rrb+sw(i)*pb(i);
       vxv=vxv+vv(i)*sw(i)*sw(i);
    end;
    rdx=rrb-rra+(z*z*vxv)**0.5; /*置信上限*/
    diff=abs(rdx-rdlx);
    if diff<dmin then do;
       dmin=diff;
       qxq=cyc;
    end;
    if &all.=1 then output;
 end; 

 end;

 rdu=str+qxq*poit;
 
/*Z统计量*/

 rdlx=0;
  
 do i=1 to ntrt;
    %rrv(ca(i),cb(i),na(i),nb(i),z,rdlx,ra(i),rb(i),vv(i));
    if abs(vv(i))<10**(-12) then vv(i)=0;
 end;

 rra=0;
 rrb=0;
 vxv=0;
 do i=1 to ntrt;
    rra=rra+sw(i)*pa(i);
    rrb=rrb+sw(i)*pb(i);
    vxv=vxv+vv(i)*sw(i)*sw(i);
 end;
 Zsc=-abs((rrb-rra)/vxv**0.5);
 Pvalue=probnorm(Zsc)*2;

 output;


 %if &all.=1 %then %do;
     keep diff rdx rdlx rdl rra rrb vxv ra: rb: rdl dmin;
 %end;
 %else %do;
     keep rdl rdu dmin lmin Zsc Pvalue rankdiff ;
 %end;
run;

%end;

data &out.;
 set dat2;
 RateDiff=rankdiff;
 LowerCL=rdl;
 UpperCL=rdu;
 Z=Zsc;
 P=Pvalue;
 keep ratediff LowerCL UpperCL Z P;
run;

%mend;


/*dat:输入的数据集文件名,stra:分层因素,trt:分组变量名,c:事件数的变量名,n:样本量的变量名,每一层的n都不能为0*/
/*Wmethod为权重计算方法,本次分析使用Wmethod=2*/
/*Wmethod=1: Wi=n0i+n1i*/
/*Wmethod=2: Wi=1/(1/n0i+1/n1i)*/

/*当stra设为0时,为不分层的MN方法,但优先推荐使用SAS自带的procedure计算不分层的MN方法*/

逆方差法(Inverse Variance Method)计算率差及其置信区间


/* 创建原始数据集 */
data studies;
    input study $ age_group $ n_treatment success_treatment n_control success_control;
    datalines;
1 30-40 100 45 100 30
2 40-50 80 50 90 40
3 30-40 150 75 150 60
4 40-50 120 70 130 55
;
run;

/* 计算各组的比例差及其标准误 */
data effect_sizes;
    set studies;

    /* 计算治疗组和对照组的成功率 */
    p_treatment = success_treatment / n_treatment;
    p_control = success_control / n_control;

    /* 计算比例差 */
    prop_diff = p_treatment - p_control;

    /* 计算标准误 */
    se_prop_diff = sqrt((p_treatment * (1 - p_treatment) / n_treatment) + 
                         (p_control * (1 - p_control) / n_control));

    output; /* 输出每个研究的效应量和标准误 */
run;

/* 计算每个分层组的权重(逆方差法) */
data weighted_effects;
    set effect_sizes;
    weight = 1 / (se_prop_diff**2); /* 逆方差法 */
run;

/* 合成总体效应量 */
proc sql;
    select 
        sum(weight * prop_diff) / sum(weight) as combined_prop_diff,
        sqrt(1 / sum(weight)) as combined_se
    into :combined_diff, :combined_se
    from weighted_effects;
quit;

/* 计算95%置信区间 */
data confidence_interval;
    lower_bound = &combined_diff - 1.96 * &combined_se; /* 正常置信区间计算 */
    upper_bound = &combined_diff + 1.96 * &combined_se;
    combined_proportion_difference = &combined_diff;
run;

data weightd_q;
    set weighted_effects;
    qdiff = weight * (prop_diff - &combined_diff.)**2;
run;

proc sql noprint;
    select sum(qdiff) into: combined_q from weightd_q;
quit;


/* 计算P值 - 使用卡方检验 */
data p_value;
    Q = &combined_q; 
    p_value = 1 - probchi(Q, 3); /* 双尾P值计算 */
run;

R语言计算RD, OR, RR

R语言sasLM package(https://cran.r-project.org/web/packages/sasLM/sasLM.pdf)可以用于计算率差(RD),率比(RR),优势比(OR)

library(sasLM)

###计算几何均值
geoMean(mtcars$mpg)
geoCV(mtcars$mpg)

###无连续性校正
RD(y1=55,
   n1=67,
   y2=43,
   n2=51)

###分层的MN方法计算率差
y1 <- c(10, 12)
n1 <- c(30, 50)
y2 <- c(43, 55)
n2 <- c(90, 80)
d0 <- data.frame(y1, n1, y2, n2)
RDmn(d0, conf.level = 0.95)

###没有分层的MN方法计算率差
RDmn1(y1=55,
   n1=67,
   y2=43,
   n2=51)

###逆方差法(inverse variance method)计算率差
RDinv(d0, conf.level = 0.95)