dm "output;clear;log;clear";
Options PS=55 LS=80 PageNo=1 NoDate MPrint
FORMCHAR='|----|+|---+=|-/\<>*';
GOptions Reset=ALL HTitle=1 HText=1 FTitle=Swiss FText=Swiss NoPrompt;
/*
* PDF code
*
ODS Listing Close;
Filename GSASFile Dummy;
GOptions Device=PDF FText=Helvetica FTitle=Helvetica;
Options TopMargin=1 BottomMargin=1 LeftMargin=1.0 RightMargin=1.0;
ODS PDF File="BreakfastCereals.pdf";
*/
/*
* HTML code
* Remove length specifications in axes statements.
*
ODS Listing Close;
ODS HTML body= "BreakfastCereals.html"
headtext="<title>Cluster Analysis of Cereals</title>"
gpath="BCereal"
anchor="BCereal";
GOptions Device=GIF Transparency NoBorder
HText=1 FText=Swiss HTitle=1 FTitle=Swiss;
*/
Title1 'Breakfast Cereals - J&W Table 11.9';
Data Cereals;
Length Brand $23 Label $4 Manufacturer $1 ;
Input Brand Manufacturer Calories Protein Fat Sodium Fiber
Carbohydrates Sugar Potassium Group;
/* Build a short label for identifying the cereals */
Label=Compress(Brand,"abcdefghijklmnopqrstuvwxyz");
If Length(Label)=1 Then Label=Substr(Brand,1,4);
Else If Label="CP" Then If Brand="CocoaPuffs" Then Label="Coco";
Datalines;
ACCheerios G 110 2 2 180 1.5 10.5 10 70 1
Cheerios G 110 6 2 290 2.0 17.0 1 105 1
CocoaPuffs G 110 1 1 180 0.0 12.0 13 55 1
CountChocula G 110 1 1 180 0.0 12.0 13 65 1
GoldenGrahams G 110 1 1 280 0.0 15.0 9 45 1
HoneyNutCheerios G 110 3 1 250 1.5 11.5 10 90 1
Kix G 110 2 1 260 0.0 21.0 3 40 1
LuckyCharms G 110 2 1 180 0.0 12.0 12 55 1
MultiGrainCheerios G 100 2 1 220 2.0 15.0 6 90 1
OatmealRaisinCrisp G 130 3 2 170 1.5 13.5 10 120 1
RaisinNutBran G 100 3 2 140 2.5 10.5 8 140 1
TotalCornFlakes G 110 2 1 200 0.0 21.0 3 35 1
TotalRaisinBran G 140 3 1 190 4.0 15.0 14 230 1
TotalWholeGrain G 100 3 1 200 3.0 16.0 3 110 1
Trix G 110 1 1 140 0.0 13.0 12 25 1
Cheaties G 100 3 1 200 3.0 17.0 3 110 1
WheatiesHoneyGold G 110 2 1 200 1.0 16.0 8 60 1
AllBran K 70 4 1 260 9.0 7.0 5 320 2
AppleJacks K 110 2 0 125 1.0 11.0 14 30 2
CornFlakes K 100 2 0 290 1.0 21.0 2 35 2
CornPops K 110 1 0 90 1.0 13.0 12 20 2
CracklinOatBran K 110 3 3 140 4.0 10.0 7 160 2
Crispix K 110 2 0 220 1.0 21.0 3 30 2
FrootLoops K 110 2 1 125 1.0 11.0 13 30 2
FrostedFlakes K 110 1 0 200 1.0 14.0 11 25 2
FrostedMiniWheats K 100 3 0 0 3.0 14.0 7 100 2
FruitfulBran K 120 3 0 240 5.0 14.0 12 190 2
JustRightCrunchyNuggets K 110 2 1 170 1.0 17.0 6 60 2
MueslixCrispyBlend K 160 3 2 150 3.0 17.0 13 160 2
NutNHoneyCrunch K 120 2 1 190 0.0 15.0 9 40 2
NutriGrainAlmondRaisin K 140 3 2 220 3.0 21.0 7 130 2
NutriGrainWheat K 90 3 0 170 3.0 18.0 2 90 2
Product19 K 100 3 0 320 1.0 20.0 3 45 2
RaisinBran K 120 3 1 210 5.0 14.0 12 240 2
RiceKrispies K 110 2 0 290 0.0 22.0 3 35 2
Smacks K 110 2 1 70 1.0 9.0 15 40 2
SpecialK K 110 6 0 230 1.0 16.0 3 55 2
CapNCrunch Q 120 1 2 220 0.0 12.0 12 35 3
HoneyGrahamOhs Q 120 1 2 220 1.0 12.0 11 45 3
Life Q 100 4 2 150 2.0 12.0 6 95 3
PuffedRice Q 50 1 0 0 0.0 13.0 0 15 3
PuffedWheat Q 50 2 0 0 1.0 10.0 0 50 3
QuakerOatmeal Q 100 5 2 0 2.7 1.0 1 110 3
;
Proc Print Data=Cereals Label;
Var Brand Label Calories -- Potassium;
Label Sodium="Na" Potassium="K" Carbohydrates="Carbs";
/* labels here are to fit records on a single line */
Run;
Title2 "Summary Statistics";
Proc Means Data=Cereals Mean Std Min Max;
Var Calories -- Potassium;
Run;
/*
* For this example it might be preferable to standardize the variables
* prior to cluster analysis. This would give equal weight to each
* of the variables in the cluster analysis since distances
* would be derived from differences in standard deviation units,
* rather than from differences in the original units (g, mg, KCal).
* PROC STANDARD could be used to first standardize the variables,
* then the cluster analysis would be performed on the STDCEREALS
* data set.
*/
Proc Standard Data=Cereals Out=StdCereals Mean=0 Std=1;
Var Calories -- Potassium;
Run;
/*
* Single, Average, and Complete Linkage algorithms will be
* used, and finally the Ward's method will be applied.
*/
Title2 "Single Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Single OutTree=TreeData;
Var Calories -- Potassium;
Id Label;
Run;
GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
Axis1 Label=(A=90);
Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;
Proc Tree Data=TreeData VAxis=Axis1;
Id Label;
Axis1 Label=(A=90);
Run;
Title2 "Average Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Average OutTree=TreeData;
Var Calories -- Potassium;
Id Label;
Run;
GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
Axis1 Label=(A=90);
Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;
Proc Tree Data=TreeData VAxis=Axis1;
Id Label;
Axis1 Label=(A=90);
Run;
Title2 "Complete Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Complete OutTree=TreeData;
Var Calories -- Potassium;
Id Label;
Run;
GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
Axis1 Label=(A=90);
Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;
Proc Tree Data=TreeData VAxis=Axis1;
Id Label;
Axis1 Label=(A=90);
Run;
Title2 "Ward's Method Cluster Analysis";
Proc Cluster Data=StdCereals Method=Ward OutTree=TreeData;
Var Calories -- Potassium;
Id Label;
Run;
GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
Axis1 Label=(A=90);
Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;
Proc Tree Data=TreeData NCL=3 Out=Clusters VAxis=Axis1;
Id Label;
Axis1 Label=(A=90);
Run;
/*
* Put the cluster identification data back with
* the original cereal data and list them out.
* Note that the LABEL variable is common to both
* data sets and is unique to each cereal. Thus sort
* by LABEL and perform a match merge.
*/
Proc Sort Data=Cereals;
By Label;
Run;
Proc Sort Data=Clusters;
By Label;
Run;
Data CerealClusters;
Length ClusterC $1; /* character cluster identifier */
Merge Cereals Clusters;
By Label;
ClusterC=Put(Cluster,1.);
Drop ClusName;
Run;
/*
* Order them for a nice report.
*/
Proc Sort Data=CerealClusters;
By Cluster Brand;
Run;
Title3 "Cereals Assigned To One of Three Clusters";
Proc Print Data=CerealClusters Label;
Var Cluster Brand Label Calories -- Potassium;
Label Cluster="Cl#" Calories="Cal" Sodium="Na"
Potassium="K" Carbohydrates="Carbs";
/* labels here are to fit records on a single line */
Run;
Title3;
/*
* Another graphical way of clustering and of viewing
* cluster results when the variables are continuous, is
* to perform a principal components analysis on the
* variables, plot the observations in the space of the
* components, and label the points according to the cluster
* to which they were assigned. Note that the cluster analysis
* was performed on standardized variables, so we should
* use PCA on the correlation matrix here.
*/
Title2 "Principal Components Analysis";
Proc Princomp Data=CerealClusters Out=Comps;
Var Calories -- Potassium;
Run;
Title3 "Symbols and Colors Indicate Cluster Membership (Ward's Method)";
Proc GPlot Data=Comps;
Plot Prin2*Prin1=Cluster
/ VAxis=Axis1 HAxis=Axis2;
Axis1 Label=(A=90 "Principal Component 2")
Order=(-4 To 6 By 1)
Length=5.75in;
Axis2 Label=("Principal Component 1")
Order=(-4 To 6 By 1)
Length=5.75in;
Symbol1 C=Black V=Circle I=None Pointlabel=(C=Black H=0.75 "#Label");
Symbol2 C=Blue V=Triangle I=None Pointlabel=(C=Blue H=0.75 "#Label");
Symbol3 C=Red V=Square I=None Pointlabel=(C=Red H=0.75 "#Label");
Run;
Quit;
Proc GPlot Data=Comps;
Plot Prin3*Prin1=Cluster
/ VAxis=Axis1 HAxis=Axis2;
Axis1 Label=(A=90 "Principal Component 3")
Order=(-4 To 6 By 1)
Length=5.75in;
Axis2 Label=("Principal Component 1")
Order=(-4 To 6 By 1)
Length=5.75in;
Symbol1 C=Black V=Circle I=None Pointlabel=(C=Black H=0.75 "#Label");
Symbol2 C=Blue V=Triangle I=None Pointlabel=(C=Blue H=0.75 "#Label");
Symbol3 C=Red V=Square I=None Pointlabel=(C=Red H=0.75 "#Label");
Run;
Quit;
Proc GPlot Data=Comps;
Plot Prin3*Prin2=Cluster
/ VAxis=Axis1 HAxis=Axis2;
Axis1 Label=(A=90 "Principal Component 3")
Order=(-4 To 6 By 1)
Length=5.75in;
Axis2 Label=("Principal Component 2")
Order=(-4 To 6 By 1)
Length=5.75in;
Symbol1 C=Black V=Circle I=None Pointlabel=(C=Black H=0.75 "#Label");
Symbol2 C=Blue V=Triangle I=None Pointlabel=(C=Blue H=0.75 "#Label");
Symbol3 C=Red V=Square I=None Pointlabel=(C=Red H=0.75 "#Label");
Run;
Quit;
Title3;
*ODS PDF Close;
*ODS HTML Close;
*ODS Listing;
| Breakfast Cereals - J&W Table 11.9 |
| Obs | Brand | Label | Calories | Protein | Fat | Na | Fiber | Carbs | Sugar | K |
| 1 | ACCheerios | ACC | 110 | 2 | 2 | 180 | 1.5 | 10.5 | 10 | 70 |
| 2 | Cheerios | Chee | 110 | 6 | 2 | 290 | 2.0 | 17.0 | 1 | 105 |
| 3 | CocoaPuffs | Coco | 110 | 1 | 1 | 180 | 0.0 | 12.0 | 13 | 55 |
| 4 | CountChocula | CC | 110 | 1 | 1 | 180 | 0.0 | 12.0 | 13 | 65 |
| 5 | GoldenGrahams | GG | 110 | 1 | 1 | 280 | 0.0 | 15.0 | 9 | 45 |
| 6 | HoneyNutCheerios | HNC | 110 | 3 | 1 | 250 | 1.5 | 11.5 | 10 | 90 |
| 7 | Kix | Kix | 110 | 2 | 1 | 260 | 0.0 | 21.0 | 3 | 40 |
| 8 | LuckyCharms | LC | 110 | 2 | 1 | 180 | 0.0 | 12.0 | 12 | 55 |
| 9 | MultiGrainCheerios | MGC | 100 | 2 | 1 | 220 | 2.0 | 15.0 | 6 | 90 |
| 10 | OatmealRaisinCrisp | ORC | 130 | 3 | 2 | 170 | 1.5 | 13.5 | 10 | 120 |
| 11 | RaisinNutBran | RNB | 100 | 3 | 2 | 140 | 2.5 | 10.5 | 8 | 140 |
| 12 | TotalCornFlakes | TCF | 110 | 2 | 1 | 200 | 0.0 | 21.0 | 3 | 35 |
| 13 | TotalRaisinBran | TRB | 140 | 3 | 1 | 190 | 4.0 | 15.0 | 14 | 230 |
| 14 | TotalWholeGrain | TWG | 100 | 3 | 1 | 200 | 3.0 | 16.0 | 3 | 110 |
| 15 | Trix | Trix | 110 | 1 | 1 | 140 | 0.0 | 13.0 | 12 | 25 |
| 16 | Cheaties | Chea | 100 | 3 | 1 | 200 | 3.0 | 17.0 | 3 | 110 |
| 17 | WheatiesHoneyGold | WHG | 110 | 2 | 1 | 200 | 1.0 | 16.0 | 8 | 60 |
| 18 | AllBran | AB | 70 | 4 | 1 | 260 | 9.0 | 7.0 | 5 | 320 |
| 19 | AppleJacks | AJ | 110 | 2 | 0 | 125 | 1.0 | 11.0 | 14 | 30 |
| 20 | CornFlakes | CF | 100 | 2 | 0 | 290 | 1.0 | 21.0 | 2 | 35 |
| 21 | CornPops | CP | 110 | 1 | 0 | 90 | 1.0 | 13.0 | 12 | 20 |
| 22 | CracklinOatBran | COB | 110 | 3 | 3 | 140 | 4.0 | 10.0 | 7 | 160 |
| 23 | Crispix | Cris | 110 | 2 | 0 | 220 | 1.0 | 21.0 | 3 | 30 |
| 24 | FrootLoops | FL | 110 | 2 | 1 | 125 | 1.0 | 11.0 | 13 | 30 |
| 25 | FrostedFlakes | FF | 110 | 1 | 0 | 200 | 1.0 | 14.0 | 11 | 25 |
| 26 | FrostedMiniWheats | FMW | 100 | 3 | 0 | 0 | 3.0 | 14.0 | 7 | 100 |
| 27 | FruitfulBran | FB | 120 | 3 | 0 | 240 | 5.0 | 14.0 | 12 | 190 |
| 28 | JustRightCrunchyNuggets | JRCN | 110 | 2 | 1 | 170 | 1.0 | 17.0 | 6 | 60 |
| 29 | MueslixCrispyBlend | MCB | 160 | 3 | 2 | 150 | 3.0 | 17.0 | 13 | 160 |
| 30 | NutNHoneyCrunch | NNHC | 120 | 2 | 1 | 190 | 0.0 | 15.0 | 9 | 40 |
| 31 | NutriGrainAlmondRaisin | NGAR | 140 | 3 | 2 | 220 | 3.0 | 21.0 | 7 | 130 |
| 32 | NutriGrainWheat | NGW | 90 | 3 | 0 | 170 | 3.0 | 18.0 | 2 | 90 |
| 33 | Product19 | P19 | 100 | 3 | 0 | 320 | 1.0 | 20.0 | 3 | 45 |
| 34 | RaisinBran | RB | 120 | 3 | 1 | 210 | 5.0 | 14.0 | 12 | 240 |
| 35 | RiceKrispies | RK | 110 | 2 | 0 | 290 | 0.0 | 22.0 | 3 | 35 |
| 36 | Smacks | Smac | 110 | 2 | 1 | 70 | 1.0 | 9.0 | 15 | 40 |
| 37 | SpecialK | SK | 110 | 6 | 0 | 230 | 1.0 | 16.0 | 3 | 55 |
| 38 | CapNCrunch | CNC | 120 | 1 | 2 | 220 | 0.0 | 12.0 | 12 | 35 |
| 39 | HoneyGrahamOhs | HGO | 120 | 1 | 2 | 220 | 1.0 | 12.0 | 11 | 45 |
| 40 | Life | Life | 100 | 4 | 2 | 150 | 2.0 | 12.0 | 6 | 95 |
| 41 | PuffedRice | PR | 50 | 1 | 0 | 0 | 0.0 | 13.0 | 0 | 15 |
| 42 | PuffedWheat | PW | 50 | 2 | 0 | 0 | 1.0 | 10.0 | 0 | 50 |
| 43 | QuakerOatmeal | QO | 100 | 5 | 2 | 0 | 2.7 | 1.0 | 1 | 110 |
| Breakfast Cereals - J&W Table 11.9 |
| Summary Statistics |
| The MEANS Procedure |
| Variable | Mean | Std Dev | Minimum | Maximum |
| Calories Protein Fat Sodium Fiber Carbohydrates Sugar Potassium |
107.9069767 2.4651163 0.9767442 180.4651163 1.7139535 14.2558140 7.6046512 84.4186047 |
18.9684123 1.2218027 0.8014384 79.2134996 1.7992831 4.2571678 4.5363061 66.1096609 |
50.0000000 1.0000000 0 0 0 1.0000000 0 15.0000000 |
160.0000000 6.0000000 3.0000000 320.0000000 9.0000000 22.0000000 15.0000000 320.0000000 |
| Breakfast Cereals - J&W Table 11.9 |
| Single Linkage Cluster Analysis |
| The CLUSTER Procedure |
| Single Linkage Cluster Analysis |
| Eigenvalues of the Covariance Matrix | ||||
| Eigenvalue | Difference | Proportion | Cumulative | |
| 1 | 2.54756944 | 0.69280765 | 0.3184 | 0.3184 |
| 2 | 1.85476179 | 0.08593184 | 0.2318 | 0.5503 |
| 3 | 1.76882996 | 0.90054660 | 0.2211 | 0.7714 |
| 4 | 0.86828336 | 0.37098255 | 0.1085 | 0.8799 |
| 5 | 0.49730081 | 0.13987236 | 0.0622 | 0.9421 |
| 6 | 0.35742845 | 0.29692509 | 0.0447 | 0.9868 |
| 7 | 0.06050337 | 0.01518055 | 0.0076 | 0.9943 |
| 8 | 0.04532281 | 0.0057 | 1.0000 | |
| Root-Mean-Square Total-Sample Standard Deviation = 1 |
| Mean Distance Between Observations = 3.711894 |
| Cluster History | |||||
| NCL | Clusters Joined | FREQ | Norm Min Dist |
T i e |
|
| 42 | Coco | CC | 2 | 0.0408 | |
| 41 | TWG | Chea | 2 | 0.0633 | |
| 40 | CNC | HGO | 2 | 0.1662 | |
| 39 | WHG | JRCN | 2 | 0.1689 | |
| 38 | CL42 | Trix | 3 | 0.2024 | |
| 37 | Kix | TCF | 2 | 0.2051 | |
| 36 | CF | RK | 2 | 0.2239 | |
| 35 | CL38 | LC | 4 | 0.2284 | |
| 34 | CL39 | NNHC | 3 | 0.2407 | |
| 33 | FL | Smac | 2 | 0.2584 | |
| 32 | CL36 | P19 | 3 | 0.2612 | |
| 31 | CL35 | CL33 | 6 | 0.2744 | |
| 30 | MGC | CL34 | 4 | 0.2833 | |
| 29 | CL32 | Cris | 4 | 0.2842 | |
| 28 | CL31 | CL30 | 10 | 0.3047 | |
| 27 | AJ | CP | 2 | 0.3075 | |
| 26 | RNB | Life | 2 | 0.3349 | |
| 25 | ACC | CL40 | 3 | 0.3403 | |
| 24 | CL28 | CL27 | 12 | 0.3414 | |
| 23 | CL24 | CL41 | 14 | 0.3436 | |
| 22 | PR | PW | 2 | 0.357 | |
| 21 | TRB | RB | 2 | 0.3571 | |
| 20 | CL37 | CL29 | 6 | 0.3575 | |
| 19 | CL23 | CL20 | 20 | 0.3729 | |
| 18 | CL19 | FF | 21 | 0.3846 | |
| 17 | CL18 | GG | 22 | 0.3954 | |
| 16 | CL17 | NGW | 23 | 0.3972 | |
| 15 | CL25 | CL16 | 26 | 0.4023 | |
| 14 | CL21 | FB | 3 | 0.4061 | |
| 13 | CL15 | HNC | 27 | 0.4362 | |
| 12 | CL26 | COB | 3 | 0.4413 | |
| 11 | CL13 | CL12 | 30 | 0.453 | |
| 10 | CL11 | ORC | 31 | 0.4561 | |
| 9 | CL14 | MCB | 4 | 0.5792 | |
| 8 | CL9 | NGAR | 5 | 0.586 | |
| 7 | CL10 | CL8 | 36 | 0.5865 | |
| 6 | CL7 | FMW | 37 | 0.713 | |
| 5 | Chee | SK | 2 | 0.7587 | |
| 4 | CL6 | CL5 | 39 | 0.7856 | |
| 3 | CL4 | QO | 40 | 0.9467 | |
| 2 | CL3 | CL22 | 42 | 0.9597 | |
| 1 | CL2 | AB | 43 | 1.1899 | |
| The TREE Procedure |
| Single Linkage Cluster Analysis |
| Breakfast Cereals - J&W Table 11.9 |
| Average Linkage Cluster Analysis |
| The CLUSTER Procedure |
| Average Linkage Cluster Analysis |
| Eigenvalues of the Covariance Matrix | ||||
| Eigenvalue | Difference | Proportion | Cumulative | |
| 1 | 2.54756944 | 0.69280765 | 0.3184 | 0.3184 |
| 2 | 1.85476179 | 0.08593184 | 0.2318 | 0.5503 |
| 3 | 1.76882996 | 0.90054660 | 0.2211 | 0.7714 |
| 4 | 0.86828336 | 0.37098255 | 0.1085 | 0.8799 |
| 5 | 0.49730081 | 0.13987236 | 0.0622 | 0.9421 |
| 6 | 0.35742845 | 0.29692509 | 0.0447 | 0.9868 |
| 7 | 0.06050337 | 0.01518055 | 0.0076 | 0.9943 |
| 8 | 0.04532281 | 0.0057 | 1.0000 | |
| Root-Mean-Square Total-Sample Standard Deviation = 1 |
| Root-Mean-Square Distance Between Observations = 4 |
| Cluster History | |||||
| NCL | Clusters Joined | FREQ | Norm RMS Dist |
T i e |
|
| 42 | Coco | CC | 2 | 0.0378 | |
| 41 | TWG | Chea | 2 | 0.0587 | |
| 40 | CNC | HGO | 2 | 0.1542 | |
| 39 | WHG | JRCN | 2 | 0.1567 | |
| 38 | Kix | TCF | 2 | 0.1903 | |
| 37 | CL42 | Trix | 3 | 0.2007 | |
| 36 | CF | RK | 2 | 0.2078 | |
| 35 | CL37 | LC | 4 | 0.2348 | |
| 34 | FL | Smac | 2 | 0.2398 | |
| 33 | CL39 | NNHC | 3 | 0.2621 | |
| 32 | CL36 | Cris | 3 | 0.266 | |
| 31 | AJ | CP | 2 | 0.2854 | |
| 30 | RNB | Life | 2 | 0.3107 | |
| 29 | MGC | CL41 | 3 | 0.3269 | |
| 28 | CL32 | P19 | 4 | 0.3301 | |
| 27 | PR | PW | 2 | 0.3313 | |
| 26 | TRB | RB | 2 | 0.3314 | |
| 25 | ACC | CL40 | 3 | 0.3576 | |
| 24 | CL31 | FF | 3 | 0.3768 | |
| 23 | CL35 | CL34 | 6 | 0.3906 | |
| 22 | GG | CL33 | 4 | 0.4086 | |
| 21 | CL38 | CL28 | 6 | 0.4258 | |
| 20 | CL29 | NGW | 4 | 0.433 | |
| 19 | CL26 | FB | 3 | 0.4424 | |
| 18 | CL23 | CL24 | 9 | 0.4498 | |
| 17 | CL22 | HNC | 5 | 0.489 | |
| 16 | CL30 | COB | 3 | 0.4898 | |
| 15 | CL25 | CL17 | 8 | 0.5279 | |
| 14 | MCB | NGAR | 2 | 0.5438 | |
| 13 | ORC | CL16 | 4 | 0.5461 | |
| 12 | CL15 | CL18 | 17 | 0.5643 | |
| 11 | CL21 | CL20 | 10 | 0.6476 | |
| 10 | Chee | SK | 2 | 0.7041 | |
| 9 | CL19 | CL14 | 5 | 0.7975 | |
| 8 | CL12 | CL13 | 21 | 0.8309 | |
| 7 | CL8 | CL11 | 31 | 0.8726 | |
| 6 | CL7 | FMW | 32 | 0.936 | |
| 5 | CL6 | CL9 | 37 | 1.031 | |
| 4 | CL5 | CL10 | 39 | 1.1179 | |
| 3 | CL4 | CL27 | 41 | 1.3636 | |
| 2 | CL3 | QO | 42 | 1.4124 | |
| 1 | CL2 | AB | 43 | 1.7291 | |
| The TREE Procedure |
| Average Linkage Cluster Analysis |
| Breakfast Cereals - J&W Table 11.9 |
| Complete Linkage Cluster Analysis |
| The CLUSTER Procedure |
| Complete Linkage Cluster Analysis |
| Eigenvalues of the Covariance Matrix | ||||
| Eigenvalue | Difference | Proportion | Cumulative | |
| 1 | 2.54756944 | 0.69280765 | 0.3184 | 0.3184 |
| 2 | 1.85476179 | 0.08593184 | 0.2318 | 0.5503 |
| 3 | 1.76882996 | 0.90054660 | 0.2211 | 0.7714 |
| 4 | 0.86828336 | 0.37098255 | 0.1085 | 0.8799 |
| 5 | 0.49730081 | 0.13987236 | 0.0622 | 0.9421 |
| 6 | 0.35742845 | 0.29692509 | 0.0447 | 0.9868 |
| 7 | 0.06050337 | 0.01518055 | 0.0076 | 0.9943 |
| 8 | 0.04532281 | 0.0057 | 1.0000 | |
| Root-Mean-Square Total-Sample Standard Deviation = 1 |
| Mean Distance Between Observations = 3.711894 |
| Cluster History | |||||
| NCL | Clusters Joined | FREQ | Norm Max Dist |
T i e |
|
| 42 | Coco | CC | 2 | 0.0408 | |
| 41 | TWG | Chea | 2 | 0.0633 | |
| 40 | CNC | HGO | 2 | 0.1662 | |
| 39 | WHG | JRCN | 2 | 0.1689 | |
| 38 | Kix | TCF | 2 | 0.2051 | |
| 37 | CF | RK | 2 | 0.2239 | |
| 36 | CL42 | Trix | 3 | 0.2294 | |
| 35 | FL | Smac | 2 | 0.2584 | |
| 34 | CL37 | Cris | 3 | 0.289 | |
| 33 | CL36 | LC | 4 | 0.2934 | |
| 32 | AJ | CP | 2 | 0.3075 | |
| 31 | CL39 | NNHC | 3 | 0.3188 | |
| 30 | RNB | Life | 2 | 0.3349 | |
| 29 | PR | PW | 2 | 0.357 | |
| 28 | TRB | RB | 2 | 0.3571 | |
| 27 | MGC | CL41 | 3 | 0.3607 | |
| 26 | ACC | CL40 | 3 | 0.4257 | |
| 25 | CL32 | FF | 3 | 0.4265 | |
| 24 | CL34 | P19 | 4 | 0.4384 | |
| 23 | GG | CL31 | 4 | 0.5123 | |
| 22 | CL33 | CL35 | 6 | 0.5211 | |
| 21 | CL28 | FB | 3 | 0.5382 | |
| 20 | HNC | ORC | 2 | 0.5465 | |
| 19 | CL27 | NGW | 4 | 0.5706 | |
| 18 | MCB | NGAR | 2 | 0.586 | |
| 17 | CL20 | CL30 | 4 | 0.6008 | |
| 16 | CL38 | CL24 | 6 | 0.6135 | |
| 15 | CL26 | CL23 | 7 | 0.686 | |
| 14 | CL22 | CL25 | 9 | 0.7193 | |
| 13 | Chee | SK | 2 | 0.7587 | |
| 12 | CL16 | CL19 | 10 | 0.8455 | |
| 11 | CL15 | CL14 | 16 | 0.9234 | |
| 10 | CL17 | COB | 5 | 0.9242 | |
| 9 | CL21 | CL18 | 5 | 1.0063 | |
| 8 | FMW | CL29 | 3 | 1.0942 | |
| 7 | CL10 | CL9 | 10 | 1.1586 | |
| 6 | CL13 | CL12 | 12 | 1.2309 | |
| 5 | CL11 | CL7 | 26 | 1.3754 | |
| 4 | CL8 | QO | 4 | 1.621 | |
| 3 | CL5 | CL6 | 38 | 1.6623 | |
| 2 | CL3 | CL4 | 42 | 2.1351 | |
| 1 | CL2 | AB | 43 | 2.2375 | |
| The TREE Procedure |
| Complete Linkage Cluster Analysis |
| Breakfast Cereals - J&W Table 11.9 |
| Ward's Method Cluster Analysis |
| The CLUSTER Procedure |
| Ward's Minimum Variance Cluster Analysis |
| Eigenvalues of the Covariance Matrix | ||||
| Eigenvalue | Difference | Proportion | Cumulative | |
| 1 | 2.54756944 | 0.69280765 | 0.3184 | 0.3184 |
| 2 | 1.85476179 | 0.08593184 | 0.2318 | 0.5503 |
| 3 | 1.76882996 | 0.90054660 | 0.2211 | 0.7714 |
| 4 | 0.86828336 | 0.37098255 | 0.1085 | 0.8799 |
| 5 | 0.49730081 | 0.13987236 | 0.0622 | 0.9421 |
| 6 | 0.35742845 | 0.29692509 | 0.0447 | 0.9868 |
| 7 | 0.06050337 | 0.01518055 | 0.0076 | 0.9943 |
| 8 | 0.04532281 | 0.0057 | 1.0000 | |
| Root-Mean-Square Total-Sample Standard Deviation = 1 |
| Root-Mean-Square Distance Between Observations = 4 |
| Cluster History | ||||||
| NCL | Clusters Joined | FREQ | SPRSQ | RSQ | T i e |
|
| 42 | Coco | CC | 2 | 0.0000 | 1.00 | |
| 41 | TWG | Chea | 2 | 0.0001 | 1.00 | |
| 40 | CNC | HGO | 2 | 0.0006 | .999 | |
| 39 | WHG | JRCN | 2 | 0.0006 | .999 | |
| 38 | Kix | TCF | 2 | 0.0009 | .998 | |
| 37 | CF | RK | 2 | 0.0010 | .997 | |
| 36 | CL42 | Trix | 3 | 0.0013 | .996 | |
| 35 | FL | Smac | 2 | 0.0014 | .994 | |
| 34 | CL36 | LC | 4 | 0.0016 | .993 | |
| 33 | CL37 | Cris | 3 | 0.0019 | .991 | |
| 32 | AJ | CP | 2 | 0.0019 | .989 | |
| 31 | CL39 | NNHC | 3 | 0.0020 | .987 | |
| 30 | RNB | Life | 2 | 0.0023 | .984 | |
| 29 | PR | PW | 2 | 0.0026 | .982 | |
| 28 | TRB | RB | 2 | 0.0026 | .979 | |
| 27 | CL33 | P19 | 4 | 0.0032 | .976 | |
| 26 | MGC | CL41 | 3 | 0.0034 | .973 | |
| 25 | CL32 | FF | 3 | 0.0039 | .969 | |
| 24 | ACC | CL40 | 3 | 0.0039 | .965 | |
| 23 | GG | CL31 | 4 | 0.0053 | .960 | |
| 22 | CL28 | FB | 3 | 0.0053 | .954 | |
| 21 | CL26 | NGW | 4 | 0.0058 | .948 | |
| 20 | HNC | ORC | 2 | 0.0061 | .942 | |
| 19 | CL30 | COB | 3 | 0.0068 | .935 | |
| 18 | MCB | NGAR | 2 | 0.0070 | .928 | |
| 17 | CL34 | CL35 | 6 | 0.0078 | .921 | |
| 16 | CL38 | CL27 | 6 | 0.0089 | .912 | |
| 15 | CL17 | CL25 | 9 | 0.0114 | .900 | |
| 14 | CL24 | CL20 | 5 | 0.0115 | .889 | |
| 13 | Chee | SK | 2 | 0.0118 | .877 | |
| 12 | CL14 | CL23 | 9 | 0.0164 | .861 | |
| 11 | CL21 | FMW | 5 | 0.0193 | .841 | |
| 10 | CL19 | QO | 4 | 0.0280 | .813 | |
| 9 | CL22 | CL18 | 5 | 0.0289 | .784 | |
| 8 | CL12 | CL15 | 18 | 0.0363 | .748 | |
| 7 | CL13 | CL11 | 7 | 0.0426 | .706 | |
| 6 | CL7 | CL16 | 13 | 0.0584 | .647 | |
| 5 | CL10 | AB | 5 | 0.0707 | .576 | |
| 4 | CL5 | CL9 | 10 | 0.0905 | .486 | |
| 3 | CL6 | CL29 | 15 | 0.1116 | .374 | |
| 2 | CL8 | CL3 | 33 | 0.1690 | .205 | |
| 1 | CL2 | CL4 | 43 | 0.2055 | .000 | |
| The TREE Procedure |
| Ward's Minimum Variance Cluster Analysis |
| Breakfast Cereals - J&W Table 11.9 |
| Ward's Method Cluster Analysis |
| Cereals Assigned To One of Three Clusters |
| Obs | Cl# | Brand | Label | Cal | Protein | Fat | Na | Fiber | Carbs | Sugar | K |
| 1 | 1 | ACCheerios | ACC | 110 | 2 | 2 | 180 | 1.5 | 10.5 | 10 | 70 |
| 2 | 1 | AppleJacks | AJ | 110 | 2 | 0 | 125 | 1.0 | 11.0 | 14 | 30 |
| 3 | 1 | CapNCrunch | CNC | 120 | 1 | 2 | 220 | 0.0 | 12.0 | 12 | 35 |
| 4 | 1 | CocoaPuffs | Coco | 110 | 1 | 1 | 180 | 0.0 | 12.0 | 13 | 55 |
| 5 | 1 | CornPops | CP | 110 | 1 | 0 | 90 | 1.0 | 13.0 | 12 | 20 |
| 6 | 1 | CountChocula | CC | 110 | 1 | 1 | 180 | 0.0 | 12.0 | 13 | 65 |
| 7 | 1 | FrootLoops | FL | 110 | 2 | 1 | 125 | 1.0 | 11.0 | 13 | 30 |
| 8 | 1 | FrostedFlakes | FF | 110 | 1 | 0 | 200 | 1.0 | 14.0 | 11 | 25 |
| 9 | 1 | GoldenGrahams | GG | 110 | 1 | 1 | 280 | 0.0 | 15.0 | 9 | 45 |
| 10 | 1 | HoneyGrahamOhs | HGO | 120 | 1 | 2 | 220 | 1.0 | 12.0 | 11 | 45 |
| 11 | 1 | HoneyNutCheerios | HNC | 110 | 3 | 1 | 250 | 1.5 | 11.5 | 10 | 90 |
| 12 | 1 | JustRightCrunchyNuggets | JRCN | 110 | 2 | 1 | 170 | 1.0 | 17.0 | 6 | 60 |
| 13 | 1 | LuckyCharms | LC | 110 | 2 | 1 | 180 | 0.0 | 12.0 | 12 | 55 |
| 14 | 1 | NutNHoneyCrunch | NNHC | 120 | 2 | 1 | 190 | 0.0 | 15.0 | 9 | 40 |
| 15 | 1 | OatmealRaisinCrisp | ORC | 130 | 3 | 2 | 170 | 1.5 | 13.5 | 10 | 120 |
| 16 | 1 | Smacks | Smac | 110 | 2 | 1 | 70 | 1.0 | 9.0 | 15 | 40 |
| 17 | 1 | Trix | Trix | 110 | 1 | 1 | 140 | 0.0 | 13.0 | 12 | 25 |
| 18 | 1 | WheatiesHoneyGold | WHG | 110 | 2 | 1 | 200 | 1.0 | 16.0 | 8 | 60 |
| 19 | 2 | Cheaties | Chea | 100 | 3 | 1 | 200 | 3.0 | 17.0 | 3 | 110 |
| 20 | 2 | Cheerios | Chee | 110 | 6 | 2 | 290 | 2.0 | 17.0 | 1 | 105 |
| 21 | 2 | CornFlakes | CF | 100 | 2 | 0 | 290 | 1.0 | 21.0 | 2 | 35 |
| 22 | 2 | Crispix | Cris | 110 | 2 | 0 | 220 | 1.0 | 21.0 | 3 | 30 |
| 23 | 2 | FrostedMiniWheats | FMW | 100 | 3 | 0 | 0 | 3.0 | 14.0 | 7 | 100 |
| 24 | 2 | Kix | Kix | 110 | 2 | 1 | 260 | 0.0 | 21.0 | 3 | 40 |
| 25 | 2 | MultiGrainCheerios | MGC | 100 | 2 | 1 | 220 | 2.0 | 15.0 | 6 | 90 |
| 26 | 2 | NutriGrainWheat | NGW | 90 | 3 | 0 | 170 | 3.0 | 18.0 | 2 | 90 |
| 27 | 2 | Product19 | P19 | 100 | 3 | 0 | 320 | 1.0 | 20.0 | 3 | 45 |
| 28 | 2 | PuffedRice | PR | 50 | 1 | 0 | 0 | 0.0 | 13.0 | 0 | 15 |
| 29 | 2 | PuffedWheat | PW | 50 | 2 | 0 | 0 | 1.0 | 10.0 | 0 | 50 |
| 30 | 2 | RiceKrispies | RK | 110 | 2 | 0 | 290 | 0.0 | 22.0 | 3 | 35 |
| 31 | 2 | SpecialK | SK | 110 | 6 | 0 | 230 | 1.0 | 16.0 | 3 | 55 |
| 32 | 2 | TotalCornFlakes | TCF | 110 | 2 | 1 | 200 | 0.0 | 21.0 | 3 | 35 |
| 33 | 2 | TotalWholeGrain | TWG | 100 | 3 | 1 | 200 | 3.0 | 16.0 | 3 | 110 |
| 34 | 3 | AllBran | AB | 70 | 4 | 1 | 260 | 9.0 | 7.0 | 5 | 320 |
| 35 | 3 | CracklinOatBran | COB | 110 | 3 | 3 | 140 | 4.0 | 10.0 | 7 | 160 |
| 36 | 3 | FruitfulBran | FB | 120 | 3 | 0 | 240 | 5.0 | 14.0 | 12 | 190 |
| 37 | 3 | Life | Life | 100 | 4 | 2 | 150 | 2.0 | 12.0 | 6 | 95 |
| 38 | 3 | MueslixCrispyBlend | MCB | 160 | 3 | 2 | 150 | 3.0 | 17.0 | 13 | 160 |
| 39 | 3 | NutriGrainAlmondRaisin | NGAR | 140 | 3 | 2 | 220 | 3.0 | 21.0 | 7 | 130 |
| 40 | 3 | QuakerOatmeal | QO | 100 | 5 | 2 | 0 | 2.7 | 1.0 | 1 | 110 |
| 41 | 3 | RaisinBran | RB | 120 | 3 | 1 | 210 | 5.0 | 14.0 | 12 | 240 |
| 42 | 3 | RaisinNutBran | RNB | 100 | 3 | 2 | 140 | 2.5 | 10.5 | 8 | 140 |
| 43 | 3 | TotalRaisinBran | TRB | 140 | 3 | 1 | 190 | 4.0 | 15.0 | 14 | 230 |
| Breakfast Cereals - J&W Table 11.9 |
| Principal Components Analysis |
| The PRINCOMP Procedure |
| Observations | 43 |
| Variables | 8 |
| Simple Statistics | ||||||||
| Calories | Protein | Fat | Sodium | Fiber | Carbohydrates | Sugar | Potassium | |
| Mean | 107.9069767 | 2.465116279 | 0.9767441860 | 180.4651163 | 1.713953488 | 14.25581395 | 7.604651163 | 84.41860465 |
| StD | 18.9684123 | 1.221802673 | 0.8014383526 | 79.2134996 | 1.799283111 | 4.25716776 | 4.536306106 | 66.10966090 |
| Correlation Matrix | ||||||||
| Calories | Protein | Fat | Sodium | Fiber | Carbohydrates | Sugar | Potassium | |
| Calories | 1.0000 | 0.0327 | 0.3883 | 0.3366 | -.0194 | 0.2559 | 0.5795 | 0.1414 |
| Protein | 0.0327 | 1.0000 | 0.2058 | 0.0937 | 0.5125 | -.0761 | -.3999 | 0.5001 |
| Fat | 0.3883 | 0.2058 | 1.0000 | 0.0096 | 0.1637 | -.3227 | 0.1873 | 0.3121 |
| Sodium | 0.3366 | 0.0937 | 0.0096 | 1.0000 | 0.0434 | 0.5673 | -.0488 | 0.1144 |
| Fiber | -.0194 | 0.5125 | 0.1637 | 0.0434 | 1.0000 | -.2408 | -.0343 | 0.9288 |
| Carbohydrates | 0.2559 | -.0761 | -.3227 | 0.5673 | -.2408 | 1.0000 | -.3152 | -.2232 |
| Sugar | 0.5795 | -.3999 | 0.1873 | -.0488 | -.0343 | -.3152 | 1.0000 | 0.0814 |
| Potassium | 0.1414 | 0.5001 | 0.3121 | 0.1144 | 0.9288 | -.2232 | 0.0814 | 1.0000 |
| Eigenvalues of the Correlation Matrix | ||||
| Eigenvalue | Difference | Proportion | Cumulative | |
| 1 | 2.54756944 | 0.69280765 | 0.3184 | 0.3184 |
| 2 | 1.85476179 | 0.08593184 | 0.2318 | 0.5503 |
| 3 | 1.76882996 | 0.90054660 | 0.2211 | 0.7714 |
| 4 | 0.86828336 | 0.37098255 | 0.1085 | 0.8799 |
| 5 | 0.49730081 | 0.13987236 | 0.0622 | 0.9421 |
| 6 | 0.35742845 | 0.29692509 | 0.0447 | 0.9868 |
| 7 | 0.06050337 | 0.01518055 | 0.0076 | 0.9943 |
| 8 | 0.04532281 | 0.0057 | 1.0000 | |
| Eigenvectors | ||||||||
| Prin1 | Prin2 | Prin3 | Prin4 | Prin5 | Prin6 | Prin7 | Prin8 | |
| Calories | 0.114288 | 0.656598 | 0.135123 | -.100479 | 0.453703 | 0.135781 | -.468935 | -.288583 |
| Protein | 0.420836 | -.227941 | 0.253063 | -.371677 | 0.551363 | -.420374 | 0.242315 | 0.171241 |
| Fat | 0.315690 | 0.301787 | -.165646 | -.687460 | -.431966 | 0.251310 | 0.125059 | 0.209366 |
| Sodium | 0.022537 | 0.273076 | 0.591711 | 0.075408 | -.501848 | -.555796 | -.086933 | -.028197 |
| Fiber | 0.558123 | -.134670 | 0.069649 | 0.376351 | -.075699 | 0.194259 | -.452959 | 0.524626 |
| Carbohydrates | -.238093 | 0.113458 | 0.631592 | 0.058778 | 0.131506 | 0.534840 | 0.369793 | 0.296615 |
| Sugar | 0.038335 | 0.565922 | -.362068 | 0.374915 | 0.123791 | -.253640 | 0.435076 | 0.371024 |
| Potassium | 0.583102 | -.000102 | 0.072717 | 0.296338 | -.115579 | 0.211327 | 0.408627 | -.584717 |