PCA and Biplots of European Protein Consumption

dm "output;clear;log;clear";
*---------------------------------------------------------*
| PROTEIN.SAS -- Principal Components Analysis and        |
| Biplot of Protein Consumption by Source for Europeans   |
*---------------------------------------------------------*;
Options PS=55 LS=80 PageNo=1 NoDate MPrint
        FORMCHAR='|----|+|---+=|-/\<>*';
GOptions Reset=ALL TargetDevice=PDF
    NoPrompt HText=1 FText=Swiss HTitle=1 FTitle=Swiss;

/*
 * PDF code
 *
ODS Listing Close;
Filename GSASFile Dummy;
GOptions Device=PDF FText=Helvetica FTitle=Helvetica;
Options TopMargin=1 BottomMargin=1 LeftMargin=1.0 RightMargin=1.0;
ODS PDF File="Protein.pdf";
 */

/*
 * HTML code
 * Remove length specifications in axes statements.
 *
ODS Listing Close;
ODS HTML body="Protein.html" 
         headtext="PCA On Protein Consumption"
         gpath="Protein"
         anchor="Protein";
GOptions Device=GIF Transparency NoBorder
         HText=1 FText=Swiss HTitle=1 FTitle=Swiss;
 */

Title1 'European Protein Consumption';

Data Protein;
 Length Country $15;
 Input Country Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg;
 ** Units of Protein are grams/head/day **;
 Abbrev=substr(country,1,3); /* 3 letter abbreviation */
Datalines;
Albania 10.1 1.4 0.5 8.9 0.2 42.3 0.6 5.5 1.7
Austria 8.9 14 4.3 19.9 2.1 28 3.6 1.3 4.3
Belg._Luxem. 13.5 9.3 4.1 17.5 4.5 26.6 5.7 2.1 4
Bulgaria 7.8 6 1.6 8.3 1.2 56.7 1.1 3.7 4.2
Czechoslovakia 9.7 11.4 2.8 12.5 2 34.3 5 1.1 4
Denmark 10.6 10.8 3.7 25 9.9 21.9 4.8 0.7 2.4
East_Germany 8.4 11.6 3.7 11.1 5.4 24.6 6.5 0.8 3.6
Finland 9.5 4.9 2.7 33.7 5.8 26.3 5.1 1 1.4
France 18 9.9 3.3 19.5 5.7 28.1 4.8 2.4 6.5
Greece 10.2 3 2.8 17.6 5.9 41.7 2.2 7.8 6.5
Hungary 5.3 12.4 2.9 9.7 0.3 40.1 4 5.4 4.2
Ireland 13.9 10 4.7 25.8 2.2 24 6.2 1.6 2.9
Italy 9 5.1 2.9 13.7 3.4 36.8 2.1 4.3 6.7
Netherlands 9.5 13.6 3.6 23.4 2.5 22.4 4.2 1.8 3.7
Norway 9.4 4.7 2.7 23.3 9.7 23 4.6 1.6 2.7
Poland 6.9 10.2 2.7 19.3 3 36.1 5.9 2 6.6
Portugal 6.2 3.7 1.1 4.9 14.2 27 5.9 4.7 7.9
Rumania 6.2 6.3 1.5 11.1 1 49.6 3.1 5.3 2.8
Spain 7.1 3.4 3.1 8.6 7 29.2 5.7 5.9 7.2
Sweden 9.9 7.8 3.5 24.7 7.5 19.5 3.7 1.4 2
Switzerland 13.1 10.1 3.1 23.8 2.3 25.6 2.8 2.4 4.9
United_Kingdom 17.4 5.7 4.7 20.6 4.3 24.3 4.7 3.4 3.3
USSR 9.3 4.6 2.1 16.6 3 43.6 6.4 3.4 2.9
West_Germany 11.4 12.5 4.1 18.8 3.4 18.6 5.2 1.5 3.8
Yugoslavia 4.4 5 1.2 9.5 0.6 55.9 3 5.7 3.2
;

Proc Sort Data=Protein;
 By Country;
Run;
 
Proc Print Data=Protein;
Run;

Title2 "PCA on the Protein Variables";
Proc PrinComp Data=Protein Cov Out=Comp1;
 Var Meat--Fruveg;
Run;

Proc GPlot Data=Comp1;
 Plot Prin1*Prin2=1 / HRef=0 VRef=0 VAxis=Axis1 HAxis=Axis2;
 Axis1 Label=(A=90 "Principal Component 1")
       Order=(-20 To 30 By 10)
	   Length=5.7in;
 Axis2 Label=("Principal Component 2")
       Order=(-20 To 30 By 10)
	   Length=5.7in;
 Symbol1 C=Black V=Dot H=0.7 I=None PointLabel=(C=Black "#Country");
Run; Quit;

%AnnoMac; /* pull in annotate macros */ 
%Macro Bplot(Alpha=1/2);
/* do the biplot computations directly */
Proc Iml;
 Reset Nolog NoPrint;
 Use Protein;
 Read All Var{Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg}
      Into X;
 Close Protein;
 N=NRow(X);
 One=J(n,1,1);
 Xbar=(X`*One)/n;   /* Centroid */
 Y=X-One*Xbar`;     /* Centered Data */
 Call SVD(u,l,v,Y);
 %If "&Alpha"="0" %Then
  %Do;
    Title2 "Biplot Analysis -- Alpha=0 With n-1 Scaling";
    E=diag(l);
    G=U*sqrt(n-1);
    H=V*E*(1/sqrt(n-1));
  %End;
 %Else %If "&Alpha"="1" %Then
  %Do;
    Title2 "Biplot Analysis -- Alpha=1";
    E=diag(l);
    G=U*E;
    H=V;
  %End;
 %Else
  %Do;
    Title2 "Biplot Analysis -- Alpha=1/2";
    E=diag(sqrt(l));
    G=U*E;
    H=V*E;
  %End;
 Create Obs From G;
 Append From G;
 Close Obs;
 Create Vars From H;
 Append From H;
 Close Vars;
Quit;

Data Obs;
 Merge Obs Protein;
Run;

Proc Format;
 Value Vars
  1="meat" 2="pigpl" 3="eggs" 4="milk" 5="fish"
  6="cereal" 7="starch" 8="nuts" 9="fruveg";
Run;

Data Vars;
 Length Country $15 Abbrev $6;
 Set Vars;
 Country=Put(_N_,Vars.);
 Abbrev=Put(_N_,Vars.);
Run;

Data Both;
 Set Vars Obs;
 Rename Col1=Prin1 Col2=Prin2 Col3=Prin3;
Run;

Data AnnoVectors;
 %Dclanno;
 %System(2,2,4);
 %Move(0,0);
 Set Vars;
 %Draw(Col2,Col1,BLACK,1,1);
Run;

GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=Both Annotate=AnnoVectors;
 Plot Prin1*Prin2=1 
      Prin1*Prin1=2 Prin2*Prin2=2 /* trick to get axes the same */
   / Overlay HRef=0 VRef=0 VAxis=Axis1 HAxis=Axis2;
 Axis1 Label=(A=90 "Principal Component 1")
	   Length=5.7in;
 Axis2 Label=("Principal Component 2")
	   Length=5.7in;
 Symbol1 C=Black V=Dot H=0.7 I=None PointLabel=(C=Black "#Country");
 Symbol2 C=Black V=None I=None;
Run; Quit;
%Mend BPlot;

%BPlot(Alpha=0);
%BPlot(Alpha=1);
%BPlot(Alpha=1/2);

*ODS PDF Close;
*ODS HTML Close;
ODS Listing;


 
European Protein Consumption

Obs Country Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg Abbrev
1 Albania 10.1 1.4 0.5 8.9 0.2 42.3 0.6 5.5 1.7 Alb
2 Austria 8.9 14.0 4.3 19.9 2.1 28.0 3.6 1.3 4.3 Aus
3 Belg._Luxem. 13.5 9.3 4.1 17.5 4.5 26.6 5.7 2.1 4.0 Bel
4 Bulgaria 7.8 6.0 1.6 8.3 1.2 56.7 1.1 3.7 4.2 Bul
5 Czechoslovakia 9.7 11.4 2.8 12.5 2.0 34.3 5.0 1.1 4.0 Cze
6 Denmark 10.6 10.8 3.7 25.0 9.9 21.9 4.8 0.7 2.4 Den
7 East_Germany 8.4 11.6 3.7 11.1 5.4 24.6 6.5 0.8 3.6 Eas
8 Finland 9.5 4.9 2.7 33.7 5.8 26.3 5.1 1.0 1.4 Fin
9 France 18.0 9.9 3.3 19.5 5.7 28.1 4.8 2.4 6.5 Fra
10 Greece 10.2 3.0 2.8 17.6 5.9 41.7 2.2 7.8 6.5 Gre
11 Hungary 5.3 12.4 2.9 9.7 0.3 40.1 4.0 5.4 4.2 Hun
12 Ireland 13.9 10.0 4.7 25.8 2.2 24.0 6.2 1.6 2.9 Ire
13 Italy 9.0 5.1 2.9 13.7 3.4 36.8 2.1 4.3 6.7 Ita
14 Netherlands 9.5 13.6 3.6 23.4 2.5 22.4 4.2 1.8 3.7 Net
15 Norway 9.4 4.7 2.7 23.3 9.7 23.0 4.6 1.6 2.7 Nor
16 Poland 6.9 10.2 2.7 19.3 3.0 36.1 5.9 2.0 6.6 Pol
17 Portugal 6.2 3.7 1.1 4.9 14.2 27.0 5.9 4.7 7.9 Por
18 Rumania 6.2 6.3 1.5 11.1 1.0 49.6 3.1 5.3 2.8 Rum
19 Spain 7.1 3.4 3.1 8.6 7.0 29.2 5.7 5.9 7.2 Spa
20 Sweden 9.9 7.8 3.5 24.7 7.5 19.5 3.7 1.4 2.0 Swe
21 Switzerland 13.1 10.1 3.1 23.8 2.3 25.6 2.8 2.4 4.9 Swi
22 USSR 9.3 4.6 2.1 16.6 3.0 43.6 6.4 3.4 2.9 USS
23 United_Kingdom 17.4 5.7 4.7 20.6 4.3 24.3 4.7 3.4 3.3 Uni
24 West_Germany 11.4 12.5 4.1 18.8 3.4 18.6 5.2 1.5 3.8 Wes
25 Yugoslavia 4.4 5.0 1.2 9.5 0.6 55.9 3.0 5.7 3.2 Yug

 


 
European Protein Consumption
PCA on the Protein Variables

The PRINCOMP Procedure

Observations 25
Variables 9
 
Simple Statistics
  Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg
Mean 9.828000000 7.896000000 2.936000000 17.11200000 4.284000000 32.24800000 4.276000000 3.072000000 4.136000000
StD 3.347078328 3.694080851 1.117616511 7.10541577 3.402533370 10.97478625 1.634084861 1.985682083 1.803903176
 
Covariance Matrix
  Meat Pigpl Eggs Milk Fish Cereal Starch Nuts Fruveg
Meat 11.2029333 1.8917833 2.1906167 11.9609000 0.6942167 -18.3622333 0.7407000 -2.3225167 -0.4481333
Pigpl 1.8917833 13.6462333 2.5614000 7.3883833 -2.9413167 -16.7760500 1.8940667 -4.6576167 -0.4086000
Eggs 2.1906167 2.5614000 1.2490667 4.5703833 0.2493500 -8.7384667 0.8259000 -1.2422833 -0.0917667
Milk 11.9609000 7.3883833 4.5703833 50.4869333 3.3335333 -46.2218500 2.5823833 -8.7629833 -5.2342000
Fish 0.6942167 -2.9413167 0.2493500 3.3335333 11.5772333 -19.5758667 2.2454333 -0.9942167 1.6335167
Cereal -18.3622333 -16.7760500 -8.7384667 -46.2218500 -19.5758667 120.4459333 -9.5633833 14.1868167 0.9215333
Starch 0.7407000 1.8940667 0.8259000 2.5823833 2.2454333 -9.5633833 2.6702333 -1.5390333 0.2488167
Nuts -2.3225167 -4.6576167 -1.2422833 -8.7629833 -0.9942167 14.1868167 -1.5390333 3.9429333 1.3431333
Fruveg -0.4481333 -0.4086000 -0.0917667 -5.2342000 1.6335167 0.9215333 0.2488167 1.3431333 3.2540667
 
Total Variance 218.47556667
 
Eigenvalues of the Covariance Matrix
  Eigenvalue Difference Proportion Cumulative
1 155.233995 124.535258 0.7105 0.7105
2 30.698737 15.055280 0.1405 0.8510
3 15.643457 7.326526 0.0716 0.9226
4 8.316930 4.687009 0.0381 0.9607
5 3.629922 1.201130 0.0166 0.9773
6 2.428791 0.871993 0.0111 0.9884
7 1.556798 0.842000 0.0071 0.9956
8 0.714799 0.462660 0.0033 0.9988
9 0.252139   0.0012 1.0000
 
Eigenvectors
  Prin1 Prin2 Prin3 Prin4 Prin5 Prin6 Prin7 Prin8 Prin9
Meat -.150654 0.132695 0.031837 0.896051 0.288803 -.229066 -.079658 0.065941 -.095932
Pigpl -.129489 0.043435 -.798384 -.185340 0.399751 0.000044 -.246275 0.260528 -.145780
Eggs -.067271 0.020946 -.098093 0.075836 0.078775 0.069341 0.141591 0.255310 0.939903
Milk -.425376 0.830856 0.219644 -.203645 0.117006 0.149439 0.036591 0.011455 -.039062
Fish -.126976 -.292307 0.522388 -.285108 0.558874 -.256157 -.335867 0.228545 0.035522
Cereal 0.860865 0.406169 0.038196 -.034500 0.274384 -.113864 -.018972 0.035042 0.038075
Starch -.066851 -.076049 -.034323 -.111867 0.240940 -.327019 0.871248 0.155131 -.167101
Nuts 0.113909 -.070066 0.166391 0.122902 -.084102 0.564564 0.083254 0.743500 -.231940
Fruveg 0.020235 -.169221 0.022178 0.073645 0.533053 0.645085 0.180124 -.481814 0.013067