dm "output;clear;log;clear";
****************************************************************;
* DistanceExample.sas *;
* Example showing use of distance macro and then subsequent *;
* use of the distance output by procedures CLUSTER and MDS. *;
* *;
* The program assumes that all macros listed below are found *;
* in the current working directory. You could also move them *;
* to an macro autocall directory and comment the include *;
* lines so that they do not execute. *;
* *;
* The data set is taken from the DISTANCE macro documentation. *;
****************************************************************;
Options PS=55 LS=80 PageNo=1 NoDate
FORMCHAR='|----|+|---+=|-/\<>*';
Title1 "Distance Macro Examples: Mammals Teeth";
/*
* Include SAS macros:
* (These are assumed to reside in the current working directory.
* Include an appropriate path as necessary.)
*/
%include "xmacro.sas" / NoSource; /* Needed by STDIZE and DISTANCE */
%include "stdize.sas" / NoSource; /* Needed by DISTANCE */
%include "distnew.sas" / NoSource; /* DISTANCE macro */
%include "dendro.sas" / NoSource; /* Plots a dendrogram */
%include "plotit7.sas" / NoSource; /* Plots scaling results */
/* Example data set from DISTANCE documentation */
Data Teeth;
Input Mammal $ 1-16 @21 (V1-V8) (1.);
Label v1=Top incisors
v2=Bottom incisors
v3=Top canines
v4=Bottom canines
v5=Top premolars
v6=Bottom premolars
v7=Top molars
v8=Bottom molars;
Datalines;
Armadillo 00000088
Mouse 11000033
Beaver 11002133
Groundhog 11002133
Rabbit 21003233
Moose 04003333
Mole 32103333
Wolf 33114423
Raccoon 33114432
Jaguar 33113211
;
/* Call the DISTANCE macro to compute Euclidean distances */
Title2 "METHOD=EUCLIDEAN";
%distance(data=teeth,
id=mammal,
options=print nomiss,
shape=square,
method=euclid,
std=std,
var=v1-v8,
out=Euclid
);
/*
* What's in the output data set?
* Notice that DATA SET TYPE is DISTANCE.
* A DISTANCE data set can be used directly
* by the CLUSTER and MDS procedures.
*/
Proc Contents Data=Euclid;
Run;
Proc Print Data=Euclid;
Run;
/*
* Perform a single-linkage cluster analysis using
* the distances computed by the DISTANCE macro.
* Save the results into a TREE data set that can
* be graphed using the DENDRO macro.
*/
Title3 "Single-linkage Cluster Analysis";
Proc Cluster Data=Euclid Method=Single OutTree=Tree;
Var Armadillo--Jaguar;
Id Mammal;
Run;
/*
* Create a graphical tree of the cluster results.
*/
%dendro;
/*
* Illustrate performing multidimensional scaling of
* the objects with distances computed by the DISTANCE
* macro. Save the results in MDSout and plot them.
*/
Title3 "Metric Multidimensional Scaling";
Proc MDS Data=Euclid Level=Absolute Out=MDSout
Dimension=2 PData PConfig PFinal;
Var Armadillo--Jaguar;
Id Mammal;
Run;
/*
* Plot the 2-dimensional solution from MDS.
*/
%Plotit(Data=MDSout,Datatype=mds,Monochro=Black);
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| Mammal | Armadillo | Mouse | Beaver | Groundhog | Rabbit | Moose | Mole | Wolf | Raccoon | Jaguar |
| Armadillo | 0.00000 | 4.05525 | 4.34184 | 4.34184 | 4.95602 | 5.77229 | 5.96370 | 7.26567 | 7.26567 | 7.43075 |
| Mouse | 4.05525 | 0.00000 | 1.55130 | 1.55130 | 2.61543 | 3.81139 | 3.91911 | 5.32308 | 5.32308 | 4.65184 |
| Beaver | 4.34184 | 1.55130 | 0.00000 | 0.00000 | 1.25596 | 2.89200 | 3.03255 | 4.39495 | 4.39495 | 4.04680 |
| Groundhog | 4.34184 | 1.55130 | 0.00000 | 0.00000 | 1.25596 | 2.89200 | 3.03255 | 4.39495 | 4.39495 | 4.04680 |
| Rabbit | 4.95602 | 2.61543 | 1.25596 | 1.25596 | 0.00000 | 2.90507 | 2.33288 | 3.69910 | 3.69910 | 3.67730 |
| Moose | 5.77229 | 3.81139 | 2.89200 | 2.89200 | 2.90507 | 0.00000 | 3.45120 | 3.95318 | 3.95318 | 4.15534 |
| Mole | 5.96370 | 3.91911 | 3.03255 | 3.03255 | 2.33288 | 3.45120 | 0.00000 | 2.47647 | 2.47647 | 2.78786 |
| Wolf | 7.26567 | 5.32308 | 4.39495 | 4.39495 | 3.69910 | 3.95318 | 2.47647 | 0.00000 | 0.77981 | 1.95177 |
| Raccoon | 7.26567 | 5.32308 | 4.39495 | 4.39495 | 3.69910 | 3.95318 | 2.47647 | 0.77981 | 0.00000 | 1.95177 |
| Jaguar | 7.43075 | 4.65184 | 4.04680 | 4.04680 | 3.67730 | 4.15534 | 2.78786 | 1.95177 | 1.95177 | 0.00000 |
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| The CONTENTS Procedure |
| Data Set Name: | WORK.EUCLID | Observations: | 10 |
| Member Type: | DATA | Variables: | 11 |
| Engine: | V8 | Indexes: | 0 |
| Created: | 16:18 Tuesday, November 27, 2001 | Observation Length: | 96 |
| Last Modified: | 16:18 Tuesday, November 27, 2001 | Deleted Observations: | 0 |
| Protection: | Compressed: | NO | |
| Data Set Type: | DISTANCE | Sorted: | NO |
| Label: |
| -----Engine/Host Dependent Information----- | |
| Data Set Page Size: | 8192 |
| Number of Data Set Pages: | 1 |
| First Data Page: | 1 |
| Max Obs per Page: | 84 |
| Obs in First Data Page: | 10 |
| Number of Data Set Repairs: | 0 |
| File Name: | C:\WINDOWS\TEMP\SAS Temporary Files\_TD28169\euclid.sas7bdat |
| Release Created: | 8.0202M0 |
| Host Created: | WIN_98 |
| -----Alphabetic List of Variables and Attributes----- | ||||
| # | Variable | Type | Len | Pos |
| 2 | Armadillo | Num | 8 | 0 |
| 4 | Beaver | Num | 8 | 16 |
| 5 | Groundhog | Num | 8 | 24 |
| 11 | Jaguar | Num | 8 | 72 |
| 1 | Mammal | Char | 16 | 80 |
| 8 | Mole | Num | 8 | 48 |
| 7 | Moose | Num | 8 | 40 |
| 3 | Mouse | Num | 8 | 8 |
| 6 | Rabbit | Num | 8 | 32 |
| 10 | Raccoon | Num | 8 | 64 |
| 9 | Wolf | Num | 8 | 56 |
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| Obs | Mammal | Armadillo | Mouse | Beaver | Groundhog | Rabbit | Moose | Mole | Wolf | Raccoon | Jaguar |
| 1 | Armadillo | 0.00000 | 4.05525 | 4.34184 | 4.34184 | 4.95602 | 5.77229 | 5.96370 | 7.26567 | 7.26567 | 7.43075 |
| 2 | Mouse | 4.05525 | 0.00000 | 1.55130 | 1.55130 | 2.61543 | 3.81139 | 3.91911 | 5.32308 | 5.32308 | 4.65184 |
| 3 | Beaver | 4.34184 | 1.55130 | 0.00000 | 0.00000 | 1.25596 | 2.89200 | 3.03255 | 4.39495 | 4.39495 | 4.04680 |
| 4 | Groundhog | 4.34184 | 1.55130 | 0.00000 | 0.00000 | 1.25596 | 2.89200 | 3.03255 | 4.39495 | 4.39495 | 4.04680 |
| 5 | Rabbit | 4.95602 | 2.61543 | 1.25596 | 1.25596 | 0.00000 | 2.90507 | 2.33288 | 3.69910 | 3.69910 | 3.67730 |
| 6 | Moose | 5.77229 | 3.81139 | 2.89200 | 2.89200 | 2.90507 | 0.00000 | 3.45120 | 3.95318 | 3.95318 | 4.15534 |
| 7 | Mole | 5.96370 | 3.91911 | 3.03255 | 3.03255 | 2.33288 | 3.45120 | 0.00000 | 2.47647 | 2.47647 | 2.78786 |
| 8 | Wolf | 7.26567 | 5.32308 | 4.39495 | 4.39495 | 3.69910 | 3.95318 | 2.47647 | 0.00000 | 0.77981 | 1.95177 |
| 9 | Raccoon | 7.26567 | 5.32308 | 4.39495 | 4.39495 | 3.69910 | 3.95318 | 2.47647 | 0.77981 | 0.00000 | 1.95177 |
| 10 | Jaguar | 7.43075 | 4.65184 | 4.04680 | 4.04680 | 3.67730 | 4.15534 | 2.78786 | 1.95177 | 1.95177 | 0.00000 |
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| Single-linkage Cluster Analysis |
| The CLUSTER Procedure |
| Single Linkage Cluster Analysis |
| Mean Distance Between Observations = 3.653877 |
| Cluster History | |||||
| NCL | Clusters Joined | FREQ | Norm Min Dist |
T i e |
|
| 9 | Beaver | Groundhog | 2 | 0 | |
| 8 | Wolf | Raccoon | 2 | 0.2134 | |
| 7 | CL9 | Rabbit | 3 | 0.3437 | |
| 6 | Mouse | CL7 | 4 | 0.4246 | |
| 5 | CL8 | Jaguar | 3 | 0.5342 | |
| 4 | CL6 | Mole | 5 | 0.6385 | |
| 3 | CL4 | CL5 | 8 | 0.6778 | |
| 2 | CL3 | Moose | 9 | 0.7915 | |
| 1 | Armadillo | CL2 | 10 | 1.1098 | |
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| Metric Multidimensional Scaling |
| Multidimensional Scaling: Data=WORK.EUCLID.DATA |
| Data Matrix | ||||||||||
| 1 | Armadillo | Mouse | Beaver | Groundhog | Rabbit | Moose | Mole | Wolf | Raccoon | Jaguar |
| Armadillo | 0 | 4.055247 | 4.341837 | 4.341837 | 4.956016 | 5.772288 | 5.963696 | 7.265674 | 7.265674 | 7.430754 |
| Mouse | 4.055247 | 0 | 1.551297 | 1.551297 | 2.61543 | 3.811393 | 3.919106 | 5.323078 | 5.323078 | 4.65184 |
| Beaver | 4.341837 | 1.551297 | 0 | 0 | 1.255957 | 2.892001 | 3.032548 | 4.39495 | 4.39495 | 4.046798 |
| Groundhog | 4.341837 | 1.551297 | 0 | 0 | 1.255957 | 2.892001 | 3.032548 | 4.39495 | 4.39495 | 4.046798 |
| Rabbit | 4.956016 | 2.61543 | 1.255957 | 1.255957 | 0 | 2.90507 | 2.332879 | 3.699095 | 3.699095 | 3.677302 |
| Moose | 5.772288 | 3.811393 | 2.892001 | 2.892001 | 2.90507 | 0 | 3.451201 | 3.953177 | 3.953177 | 4.15534 |
| Mole | 5.963696 | 3.919106 | 3.032548 | 3.032548 | 2.332879 | 3.451201 | 0 | 2.476474 | 2.476474 | 2.787862 |
| Wolf | 7.265674 | 5.323078 | 4.39495 | 4.39495 | 3.699095 | 3.953177 | 2.476474 | 0 | 0.779813 | 1.951769 |
| Raccoon | 7.265674 | 5.323078 | 4.39495 | 4.39495 | 3.699095 | 3.953177 | 2.476474 | 0.779813 | 0 | 1.951769 |
| Jaguar | 7.430754 | 4.65184 | 4.046798 | 4.046798 | 3.677302 | 4.15534 | 2.787862 | 1.951769 | 1.951769 | 0 |
| Distance Macro Examples: Mammals Teeth |
| METHOD=EUCLIDEAN |
| Metric Multidimensional Scaling |
| Multidimensional Scaling: Data=WORK.EUCLID.DATA |
| Shape=TRIANGLE Condition=MATRIX Level=ABSOLUTE |
| Coef=IDENTITY Dimension=2 Formula=1 Fit=1 |
| Gconverge=0.01 Maxiter=100 Over=1 Ridge=0.0001 |
| Iteration | Type | Badness- of-Fit Criterion |
Change in Criterion |
Convergence Measure |
| 0 | Initial | 0.1796 | . | 0.7946 |
| 1 | Lev-Mar | 0.1007 | 0.0789 | 0.3066 |
| 2 | Gau-New | 0.0951 | 0.005573 | 0.0944 |
| 3 | Gau-New | 0.0945 | 0.000654 | 0.0525 |
| 4 | Gau-New | 0.0943 | 0.000207 | 0.0310 |
| 5 | Gau-New | 0.0942 | 0.0000723 | 0.0184 |
| 6 | Gau-New | 0.0942 | 0.0000254 | 0.0108 |
| 7 | Gau-New | 0.0942 | 8.7399E-6 | 0.006309 |
| Convergence criterion is satisfied. |
| Configuration | ||
| Dim1 | Dim2 | |
| Armadillo | 4.61 | -1.79 |
| Mouse | 2.33 | 1.17 |
| Beaver | 1.23 | 0.57 |
| Groundhog | 1.23 | 0.57 |
| Rabbit | 0.48 | -0.14 |
| Moose | -0.46 | 2.50 |
| Mole | -0.86 | -1.42 |
| Wolf | -2.78 | -1.22 |
| Raccoon | -2.92 | -0.81 |
| Jaguar | -2.86 | 0.58 |