PCA and Biplots of European Protein Consumption
dm "output;clear;log;clear";
*---------------------------------------------------------*
| PROTEIN.SAS -- Principal Components Analysis and |
| Biplot of Protein Consumption by Source for Europeans |
*---------------------------------------------------------*;
Options PS=55 LS=80 PageNo=1 NoDate MPrint
FORMCHAR='|----|+|---+=|-/\<>*';
GOptions Reset=ALL TargetDevice=PDF
NoPrompt HText=1 FText=Swiss HTitle=1 FTitle=Swiss;
/*
* PDF code
*
ODS Listing Close;
Filename GSASFile Dummy;
GOptions Device=PDF FText=Helvetica FTitle=Helvetica;
Options TopMargin=1 BottomMargin=1 LeftMargin=1.0 RightMargin=1.0;
ODS PDF File="Protein.pdf";
*/
/*
* HTML code
* Remove length specifications in axes statements.
*
ODS Listing Close;
ODS HTML body="Protein.html"
headtext="PCA On Protein Consumption"
gpath="Protein"
anchor="Protein";
GOptions Device=GIF Transparency NoBorder
HText=1 FText=Swiss HTitle=1 FTitle=Swiss;
*/
Title1 'European Protein Consumption';
Data Protein;
Length Country $15;
Input Country Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg;
** Units of Protein are grams/head/day **;
Abbrev=substr(country,1,3); /* 3 letter abbreviation */
Datalines;
Albania 10.1 1.4 0.5 8.9 0.2 42.3 0.6 5.5 1.7
Austria 8.9 14 4.3 19.9 2.1 28 3.6 1.3 4.3
Belg._Luxem. 13.5 9.3 4.1 17.5 4.5 26.6 5.7 2.1 4
Bulgaria 7.8 6 1.6 8.3 1.2 56.7 1.1 3.7 4.2
Czechoslovakia 9.7 11.4 2.8 12.5 2 34.3 5 1.1 4
Denmark 10.6 10.8 3.7 25 9.9 21.9 4.8 0.7 2.4
East_Germany 8.4 11.6 3.7 11.1 5.4 24.6 6.5 0.8 3.6
Finland 9.5 4.9 2.7 33.7 5.8 26.3 5.1 1 1.4
France 18 9.9 3.3 19.5 5.7 28.1 4.8 2.4 6.5
Greece 10.2 3 2.8 17.6 5.9 41.7 2.2 7.8 6.5
Hungary 5.3 12.4 2.9 9.7 0.3 40.1 4 5.4 4.2
Ireland 13.9 10 4.7 25.8 2.2 24 6.2 1.6 2.9
Italy 9 5.1 2.9 13.7 3.4 36.8 2.1 4.3 6.7
Netherlands 9.5 13.6 3.6 23.4 2.5 22.4 4.2 1.8 3.7
Norway 9.4 4.7 2.7 23.3 9.7 23 4.6 1.6 2.7
Poland 6.9 10.2 2.7 19.3 3 36.1 5.9 2 6.6
Portugal 6.2 3.7 1.1 4.9 14.2 27 5.9 4.7 7.9
Rumania 6.2 6.3 1.5 11.1 1 49.6 3.1 5.3 2.8
Spain 7.1 3.4 3.1 8.6 7 29.2 5.7 5.9 7.2
Sweden 9.9 7.8 3.5 24.7 7.5 19.5 3.7 1.4 2
Switzerland 13.1 10.1 3.1 23.8 2.3 25.6 2.8 2.4 4.9
United_Kingdom 17.4 5.7 4.7 20.6 4.3 24.3 4.7 3.4 3.3
USSR 9.3 4.6 2.1 16.6 3 43.6 6.4 3.4 2.9
West_Germany 11.4 12.5 4.1 18.8 3.4 18.6 5.2 1.5 3.8
Yugoslavia 4.4 5 1.2 9.5 0.6 55.9 3 5.7 3.2
;
Proc Sort Data=Protein;
By Country;
Run;
Proc Print Data=Protein;
Run;
Title2 "PCA on the Protein Variables";
Proc PrinComp Data=Protein Cov Out=Comp1;
Var Meat--Fruveg;
Run;
Proc GPlot Data=Comp1;
Plot Prin1*Prin2=1 / HRef=0 VRef=0 VAxis=Axis1 HAxis=Axis2;
Axis1 Label=(A=90 "Principal Component 1")
Order=(-20 To 30 By 10)
Length=5.7in;
Axis2 Label=("Principal Component 2")
Order=(-20 To 30 By 10)
Length=5.7in;
Symbol1 C=Black V=Dot H=0.7 I=None PointLabel=(C=Black "#Country");
Run; Quit;
%AnnoMac; /* pull in annotate macros */
%Macro Bplot(Alpha=1/2);
/* do the biplot computations directly */
Proc Iml;
Reset Nolog NoPrint;
Use Protein;
Read All Var{Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg}
Into X;
Close Protein;
N=NRow(X);
One=J(n,1,1);
Xbar=(X`*One)/n; /* Centroid */
Y=X-One*Xbar`; /* Centered Data */
Call SVD(u,l,v,Y);
%If "&Alpha"="0" %Then
%Do;
Title2 "Biplot Analysis -- Alpha=0 With n-1 Scaling";
E=diag(l);
G=U*sqrt(n-1);
H=V*E*(1/sqrt(n-1));
%End;
%Else %If "&Alpha"="1" %Then
%Do;
Title2 "Biplot Analysis -- Alpha=1";
E=diag(l);
G=U*E;
H=V;
%End;
%Else
%Do;
Title2 "Biplot Analysis -- Alpha=1/2";
E=diag(sqrt(l));
G=U*E;
H=V*E;
%End;
Create Obs From G;
Append From G;
Close Obs;
Create Vars From H;
Append From H;
Close Vars;
Quit;
Data Obs;
Merge Obs Protein;
Run;
Proc Format;
Value Vars
1="meat" 2="pigpl" 3="eggs" 4="milk" 5="fish"
6="cereal" 7="starch" 8="nuts" 9="fruveg";
Run;
Data Vars;
Length Country $15 Abbrev $6;
Set Vars;
Country=Put(_N_,Vars.);
Abbrev=Put(_N_,Vars.);
Run;
Data Both;
Set Vars Obs;
Rename Col1=Prin1 Col2=Prin2 Col3=Prin3;
Run;
Data AnnoVectors;
%Dclanno;
%System(2,2,4);
%Move(0,0);
Set Vars;
%Draw(Col2,Col1,BLACK,1,1);
Run;
GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=Both Annotate=AnnoVectors;
Plot Prin1*Prin2=1
Prin1*Prin1=2 Prin2*Prin2=2 /* trick to get axes the same */
/ Overlay HRef=0 VRef=0 VAxis=Axis1 HAxis=Axis2;
Axis1 Label=(A=90 "Principal Component 1")
Length=5.7in;
Axis2 Label=("Principal Component 2")
Length=5.7in;
Symbol1 C=Black V=Dot H=0.7 I=None PointLabel=(C=Black "#Country");
Symbol2 C=Black V=None I=None;
Run; Quit;
%Mend BPlot;
%BPlot(Alpha=0);
%BPlot(Alpha=1);
%BPlot(Alpha=1/2);
*ODS PDF Close;
*ODS HTML Close;
ODS Listing;
| European Protein Consumption |
| Obs |
Country |
Meat |
Pigpl |
Eggs |
Milk |
Fish |
Cereal |
Starch |
Nuts |
Fruveg |
Abbrev |
| 1 |
Albania |
10.1 |
1.4 |
0.5 |
8.9 |
0.2 |
42.3 |
0.6 |
5.5 |
1.7 |
Alb |
| 2 |
Austria |
8.9 |
14.0 |
4.3 |
19.9 |
2.1 |
28.0 |
3.6 |
1.3 |
4.3 |
Aus |
| 3 |
Belg._Luxem. |
13.5 |
9.3 |
4.1 |
17.5 |
4.5 |
26.6 |
5.7 |
2.1 |
4.0 |
Bel |
| 4 |
Bulgaria |
7.8 |
6.0 |
1.6 |
8.3 |
1.2 |
56.7 |
1.1 |
3.7 |
4.2 |
Bul |
| 5 |
Czechoslovakia |
9.7 |
11.4 |
2.8 |
12.5 |
2.0 |
34.3 |
5.0 |
1.1 |
4.0 |
Cze |
| 6 |
Denmark |
10.6 |
10.8 |
3.7 |
25.0 |
9.9 |
21.9 |
4.8 |
0.7 |
2.4 |
Den |
| 7 |
East_Germany |
8.4 |
11.6 |
3.7 |
11.1 |
5.4 |
24.6 |
6.5 |
0.8 |
3.6 |
Eas |
| 8 |
Finland |
9.5 |
4.9 |
2.7 |
33.7 |
5.8 |
26.3 |
5.1 |
1.0 |
1.4 |
Fin |
| 9 |
France |
18.0 |
9.9 |
3.3 |
19.5 |
5.7 |
28.1 |
4.8 |
2.4 |
6.5 |
Fra |
| 10 |
Greece |
10.2 |
3.0 |
2.8 |
17.6 |
5.9 |
41.7 |
2.2 |
7.8 |
6.5 |
Gre |
| 11 |
Hungary |
5.3 |
12.4 |
2.9 |
9.7 |
0.3 |
40.1 |
4.0 |
5.4 |
4.2 |
Hun |
| 12 |
Ireland |
13.9 |
10.0 |
4.7 |
25.8 |
2.2 |
24.0 |
6.2 |
1.6 |
2.9 |
Ire |
| 13 |
Italy |
9.0 |
5.1 |
2.9 |
13.7 |
3.4 |
36.8 |
2.1 |
4.3 |
6.7 |
Ita |
| 14 |
Netherlands |
9.5 |
13.6 |
3.6 |
23.4 |
2.5 |
22.4 |
4.2 |
1.8 |
3.7 |
Net |
| 15 |
Norway |
9.4 |
4.7 |
2.7 |
23.3 |
9.7 |
23.0 |
4.6 |
1.6 |
2.7 |
Nor |
| 16 |
Poland |
6.9 |
10.2 |
2.7 |
19.3 |
3.0 |
36.1 |
5.9 |
2.0 |
6.6 |
Pol |
| 17 |
Portugal |
6.2 |
3.7 |
1.1 |
4.9 |
14.2 |
27.0 |
5.9 |
4.7 |
7.9 |
Por |
| 18 |
Rumania |
6.2 |
6.3 |
1.5 |
11.1 |
1.0 |
49.6 |
3.1 |
5.3 |
2.8 |
Rum |
| 19 |
Spain |
7.1 |
3.4 |
3.1 |
8.6 |
7.0 |
29.2 |
5.7 |
5.9 |
7.2 |
Spa |
| 20 |
Sweden |
9.9 |
7.8 |
3.5 |
24.7 |
7.5 |
19.5 |
3.7 |
1.4 |
2.0 |
Swe |
| 21 |
Switzerland |
13.1 |
10.1 |
3.1 |
23.8 |
2.3 |
25.6 |
2.8 |
2.4 |
4.9 |
Swi |
| 22 |
USSR |
9.3 |
4.6 |
2.1 |
16.6 |
3.0 |
43.6 |
6.4 |
3.4 |
2.9 |
USS |
| 23 |
United_Kingdom |
17.4 |
5.7 |
4.7 |
20.6 |
4.3 |
24.3 |
4.7 |
3.4 |
3.3 |
Uni |
| 24 |
West_Germany |
11.4 |
12.5 |
4.1 |
18.8 |
3.4 |
18.6 |
5.2 |
1.5 |
3.8 |
Wes |
| 25 |
Yugoslavia |
4.4 |
5.0 |
1.2 |
9.5 |
0.6 |
55.9 |
3.0 |
5.7 |
3.2 |
Yug |
| European Protein Consumption |
| PCA on the Protein Variables |
| Observations |
25 |
| Variables |
9 |
| Simple Statistics |
| |
Meat |
Pigpl |
Eggs |
Milk |
Fish |
Cereal |
Starch |
Nuts |
Fruveg |
| Mean |
9.828000000 |
7.896000000 |
2.936000000 |
17.11200000 |
4.284000000 |
32.24800000 |
4.276000000 |
3.072000000 |
4.136000000 |
| StD |
3.347078328 |
3.694080851 |
1.117616511 |
7.10541577 |
3.402533370 |
10.97478625 |
1.634084861 |
1.985682083 |
1.803903176 |
| Covariance Matrix |
| |
Meat |
Pigpl |
Eggs |
Milk |
Fish |
Cereal |
Starch |
Nuts |
Fruveg |
| Meat |
11.2029333 |
1.8917833 |
2.1906167 |
11.9609000 |
0.6942167 |
-18.3622333 |
0.7407000 |
-2.3225167 |
-0.4481333 |
| Pigpl |
1.8917833 |
13.6462333 |
2.5614000 |
7.3883833 |
-2.9413167 |
-16.7760500 |
1.8940667 |
-4.6576167 |
-0.4086000 |
| Eggs |
2.1906167 |
2.5614000 |
1.2490667 |
4.5703833 |
0.2493500 |
-8.7384667 |
0.8259000 |
-1.2422833 |
-0.0917667 |
| Milk |
11.9609000 |
7.3883833 |
4.5703833 |
50.4869333 |
3.3335333 |
-46.2218500 |
2.5823833 |
-8.7629833 |
-5.2342000 |
| Fish |
0.6942167 |
-2.9413167 |
0.2493500 |
3.3335333 |
11.5772333 |
-19.5758667 |
2.2454333 |
-0.9942167 |
1.6335167 |
| Cereal |
-18.3622333 |
-16.7760500 |
-8.7384667 |
-46.2218500 |
-19.5758667 |
120.4459333 |
-9.5633833 |
14.1868167 |
0.9215333 |
| Starch |
0.7407000 |
1.8940667 |
0.8259000 |
2.5823833 |
2.2454333 |
-9.5633833 |
2.6702333 |
-1.5390333 |
0.2488167 |
| Nuts |
-2.3225167 |
-4.6576167 |
-1.2422833 |
-8.7629833 |
-0.9942167 |
14.1868167 |
-1.5390333 |
3.9429333 |
1.3431333 |
| Fruveg |
-0.4481333 |
-0.4086000 |
-0.0917667 |
-5.2342000 |
1.6335167 |
0.9215333 |
0.2488167 |
1.3431333 |
3.2540667 |
| Total Variance |
218.47556667 |
| Eigenvalues of the Covariance Matrix |
| |
Eigenvalue |
Difference |
Proportion |
Cumulative |
| 1 |
155.233995 |
124.535258 |
0.7105 |
0.7105 |
| 2 |
30.698737 |
15.055280 |
0.1405 |
0.8510 |
| 3 |
15.643457 |
7.326526 |
0.0716 |
0.9226 |
| 4 |
8.316930 |
4.687009 |
0.0381 |
0.9607 |
| 5 |
3.629922 |
1.201130 |
0.0166 |
0.9773 |
| 6 |
2.428791 |
0.871993 |
0.0111 |
0.9884 |
| 7 |
1.556798 |
0.842000 |
0.0071 |
0.9956 |
| 8 |
0.714799 |
0.462660 |
0.0033 |
0.9988 |
| 9 |
0.252139 |
|
0.0012 |
1.0000 |
| Eigenvectors |
| |
Prin1 |
Prin2 |
Prin3 |
Prin4 |
Prin5 |
Prin6 |
Prin7 |
Prin8 |
Prin9 |
| Meat |
-.150654 |
0.132695 |
0.031837 |
0.896051 |
0.288803 |
-.229066 |
-.079658 |
0.065941 |
-.095932 |
| Pigpl |
-.129489 |
0.043435 |
-.798384 |
-.185340 |
0.399751 |
0.000044 |
-.246275 |
0.260528 |
-.145780 |
| Eggs |
-.067271 |
0.020946 |
-.098093 |
0.075836 |
0.078775 |
0.069341 |
0.141591 |
0.255310 |
0.939903 |
| Milk |
-.425376 |
0.830856 |
0.219644 |
-.203645 |
0.117006 |
0.149439 |
0.036591 |
0.011455 |
-.039062 |
| Fish |
-.126976 |
-.292307 |
0.522388 |
-.285108 |
0.558874 |
-.256157 |
-.335867 |
0.228545 |
0.035522 |
| Cereal |
0.860865 |
0.406169 |
0.038196 |
-.034500 |
0.274384 |
-.113864 |
-.018972 |
0.035042 |
0.038075 |
| Starch |
-.066851 |
-.076049 |
-.034323 |
-.111867 |
0.240940 |
-.327019 |
0.871248 |
0.155131 |
-.167101 |
| Nuts |
0.113909 |
-.070066 |
0.166391 |
0.122902 |
-.084102 |
0.564564 |
0.083254 |
0.743500 |
-.231940 |
| Fruveg |
0.020235 |
-.169221 |
0.022178 |
0.073645 |
0.533053 |
0.645085 |
0.180124 |
-.481814 |
0.013067 |