/********************************************/
/* Program m9_4_4                           */
/* Cluster Analysis CEDAR Project           */
/* Symptom Data Set                         */
/*                                          */
/* ALL Data                                 */
/********************************************/
%include 'e:xmacro.sas';
%include 'e:distnew.sas';

Options ls=80 nodate pageno=1 formdlim='o';

Title 'Cluster Analysis CEDAR Symptom Data---ALL';

Data Sympc;
     Infile 'e:child.dat';
     input subject $ group alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
           can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9
           age freq famid sud;
	 total = alc1+alc2+alc3+alc4+alc5+alc6+can1+can2+
           can3+can4+can5+can6+can7+tob1+tob3+tob4+tob7+tob8+tob9;
	 if total=0 then delete;
	 if total > 0 then member='c';
data Sympm;
     Infile 'e:mother.dat';
     input subject $ group alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
           can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9
           age freq famid sud;
	 total = alc1+alc2+alc3+alc4+alc5+alc6+can1+can2+
           can3+can4+can5+can6+can7+tob1+tob3+tob4+tob7+tob8+tob9;
	 if total=0 then delete;
	 if total>0 then member='m';
run;
data Sympf;
     Infile 'e:father.dat';
     input subject $ group alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
           can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9
           age freq famid sud;
	 total = alc1+alc2+alc3+alc4+alc5+alc6+can1+can2+
           can3+can4+can5+can6+can7+tob1+tob3+tob4+tob7+tob8+tob9;
	 if total=0 then delete;
	 if total>0 then member='f';
data all;
     set sympc sympf sympm;
run;
proc sort data=all;
     by subject;
run;
%distance(data=all, id=subject, options=nomiss, out=distjacc,
          shape=square, method=djaccard, var=
          alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
          can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9);

proc cluster  data=distjacc method=ward outtree=tree noprint;
     id subject;
	 var _004701BP--_301703BI;
run;
Title2 'Jaccard Coefficient Wards Method';
proc tree  data=tree out=jar nclusters=3;
     id subject;
run;
proc sort;
     by subject;
run;
data new;
     merge all jar;
	 by subject;
run;
proc sort;
     by cluster;
run;
proc print;
     by cluster;
     var subject group sud age freq member;
title2 'Jaccard Coefficient -- Wards Method';
run;
title2 'Non-hierarchical Method-- all';
proc fastclus  data=all out=hier maxclusters=3 maxiter=100;
     id subject;
     var alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
	      can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9;
run;
proc sort;
     by subject;
run;
data new;
     merge all jar;
	 by subject;
run;
proc sort;
     by cluster;
run;
proc print;
     by cluster;
     var subject group sud age freq member;
run;
proc candisc data=hier out=can;
    var alc1 alc2 alc3 alc4 alc5 alc6 can1 can2
	      can3 can4 can5 can6 can7 tob1 tob3 tob4 tob7 tob8 tob9;
    class cluster;
run;
proc gplot data=can;
    plot can12*can22=cluster;
	symbol1  v=triangle c=red;
	symbol2  v=dot c=green;
	symbol3  v=circle c=blue;
run;

