Discrete Data Examples

dm "output;clear;log;clear";
******************************************************;
* DiscreteData.sas -- Examples of generating Poisson *;
* and Negative Binomial data and the analysis of it. *;
* Further examples of data from Ludwig and Reynolds  *;
* with goodness-of-fit statistics.                   *;
******************************************************;
Options PS=55 LS=80 PageNo=1 NoDate
        FORMCHAR='|----|+|---+=|-/\<>*';
GOptions Device=gif Transparency NoBorder NoPrompt
         VSize=6 in HSize=6 in
         HText=1 FText=Swiss HTitle=1 FTitle=Swiss;

Title1 "Discrete Data Models";

Title2 "Poisson Data";
Data Poi;
 Retain Seed 0 lambda 16 N 100;
 Do i=1 To N;
  Y=RanPoi(Seed,lambda);
  Keep Y;
  Output;
 End;
Run;

Proc GChart Data=Poi;
* VBar Y / Discrete;
 VBar Y;
Run;
Quit;

/*
 * GENMOD estimates the mean of the response.
 * The extra-dispersion should be zero here
 * since variance=mean in the Poisson.
 */

Proc Genmod Data=Poi;
 Model Y = / Dist=Poisson Link=Log LRCI MaxIter=50;
 Estimate "Population Mean" Intercept 1 / Exp;
Run;
Quit;

Title3 "Consider an Overdispersed Model";
Proc Genmod Data=Poi;
 Model Y = / Dist=Poisson Link=Log DScale LRCI MaxIter=50;
 Estimate "Population Mean" Intercept 1 / Exp;
Run;
Quit;

Title3 "Fit the Negative Binomial Model";
Proc Genmod Data=NB;
 Model Y = / Dist=Negbin Link=Log LRCI MaxIter=50;
 Estimate "Population Mean" Intercept 1 / Exp;
Run;
Quit;


Title2 "Negative Binomial Data";
Data NB;
 /* 
  * Generate Negative Binomial NB(r,p) data. 
  * Use result that negative binomial can result
  * from generalizing of Poisson(X/p), where
  * X is Gamma(r).
  */
 Retain Seed 0 r 8 p 0.5 N 100;
 OneMinusP=1-p;
 Mu=r*OneMinusP/p;
 MuPlusR=Mu+r;
 Sigma2=r*OneMinusP/p**2;
 k=(Sigma2-Mu)/Mu**2;
 Put Mu= Sigma2= MuPlusR= k=;
 Do i=1 To N;
  X=RanGam(Seed,r);
  lambda=X/p;
  Y=RanPoi(Seed,lambda);
  Keep X Y;
  Output;
 End;
Run;

Proc GChart Data=NB;
* VBar Y / Discrete;
 VBar Y;
Run;
Quit;

/*
 * GENMOD estimates the mean of the response, so
 * is estimating Mu+r from distribution above.
 * Also the dispersion parameter k is from
 * V(Y) = mu + k*mu**2
 */

Proc Genmod Data=NB;
 Model Y = / Dist=Negbin Link=Log LRCI MaxIter=50;
 Estimate "Population Mean" Intercept 1 / Exp;
Run;
Quit;

Title3 "Treat as Overdispersed Poisson Model";
Proc Genmod Data=NB;
 Model Y = / Dist=Poisson Link=Log DScale LRCI MaxIter=50;
 Estimate "Population Mean" Intercept 1 / Exp;
Run;
Quit;


/*
 * Example: Carpenter Bee Larvae in soap-tree yucca plants
 * From pages 29-35 in Ludwig and Reynolds 1988.
 * The data come in a summarized form. Though it is possible
 * to do the analysis in this format, some procedures do
 * not handle all aspects of the data properly. Thus, 
 * take the data back to individual observations.
 */
Title2 "Carpenter Bee Larvae Counts in Soap-tree Yucca";
Data CarpenterBees;
 Input Y Frequency;
 Do i=1 To Frequency;
  Output;
 End;
 Keep Y;
Datalines;
 0 114
 1  25
 2  15
 3  10
 4   6
 5   5
 6   2
 7   1
 8   1
 9   0
10   1
;

/* 
 * Show original frequency table
 */
Proc Freq Data=CarpenterBees;
 Table Y;
Run;

/*
 * Examine a histogram of the data
 */
Proc GChart Data=CarpenterBees;
 VBar Y / Discrete;
Run;

Proc Univariate Data=CarpenterBees;
 Var Y;
Run;

/*
 * Fit a Poisson distribution to the data
 */
Title3 "Poisson Model";
Proc Genmod Data=CarpenterBees;
 Model Y = / Dist=Poisson Link=Log LRCI;
 Estimate "Population Mean" Intercept 1 / Exp;
 ODS Output ParameterEstimates=Parms;
Run;

/*
 * Compute Expected Probabilities. These
 * will be used in a GOF test to follow.
 */
Data Expected;
 If _N_=1 Then
  Do;
   Set Parms;
   Lambda=Exp(Estimate); /* First obs is ln(lambda) */
   ELambda=Exp(-Lambda);
   Retain Lambda ELambda;
  End;
 Do Y=0 To 10;
  Prob=(Lambda**Y)*ELambda/Gamma(Y+1); /* Poisson Probability */
  Expected=180*Prob;
  Cummulative+Prob;
  InvCum=1-Cummulative+Prob;
  Output;
 End;
 Stop;
 Keep Y Prob Expected Lambda Cummulative InvCum;
Run;
Title4 "Expected Probabilities";
Proc Print Data=Expected;
Run;
 
/*
 * Can use PROC FREQ to do GOF test, though
 * d.f. are not correct. Since some expected
 * values will be less than 1, we will group
 * the data for Y>=4 into a common group.
 */
Proc Format;
 Value YGroup 4-High="4+";
Run;

/*
 * Since there will be 5 cells in this table,
 * PROC FREQ will compute the d.f. to be 5-1=4.
 * However, the probabilities were predicted
 * by estimating the parameter Lambda using the
 * same data. Thus we need to lose 1 more d.f.
 * Thus, d.f.=5-1-1=3.
 */
Title4 "Pearson Chi-square Goodness-of-fit Test";
Title5 "Note: Degrees of Freedom Should Be 3";
Proc Freq Data=CarpenterBees;
 Table Y / Chisq NoCum TestP=(38.674 36.740 17.452 5.526 1.607);
 Format Y YGroup.;
Run;

/*
 * Repeat analysis using the Negative Binomial Model.
 */
Title3 "Negative Binomial Model";
Proc Genmod Data=CarpenterBees;
 Model Y = / Dist=NegBin Link=Log LRCI MaxIter=500;
 Estimate "Population Mean" Intercept 1 / Exp;
 ODS Output ParameterEstimates=Parms;
Run;

Data Expected;
 If _N_=1 Then
  Do;
   i=1;
   Set Parms Point=i Nobs=Nobs;
   Mu=Exp(Estimate); /* First obs is ln(Mu) */
   i=2;
   Set Parms Point=i Nobs=Nobs;
   k=Estimate;    /* Second obs is dispersion parameter */
   kinv=1/k;
   VarY=Mu+k*Mu**2;
   Retain Mu k VarY kinv;
  End;
 Do Y=0 To 10;
  Prob=Gamma(Y+kinv)/(Gamma(Y+1)*Gamma(kinv))*(k*mu)**Y/((1+k*mu)**(Y+kinv)); /* Neg binomial Probability */
  Expected=180*Prob;
  Cummulative+Prob;
  InvCum=1-Cummulative+Prob;
  Output;
 End;
 Stop;
 Keep Y Prob Expected Mu k kinv VarY Cummulative InvCum;
Run;
Title4 "Expected Probabilities";
Proc Print Data=Expected;
Run;

Proc Format;
 Value YGroup 5-High="5+";
Run;

Title4 "Pearson Chi-square Goodness-of-fit Test";
Title5 "Note: Degrees of Freedom Should Be 3";
Proc Freq Data=CarpenterBees;
 Table Y / Chisq NoCum TestP=(62.617 16.530 8.150 4.641 2.820 5.243);
 Format Y YGroup.;
Run;

/*
 * Example: Mites on Apple Leaves
 * Source: Pages 37-38 in Ludwig and Reynolds 1988
 */

Title2 "Mites on Apple Leaves";
Data AppleLeaves;
 Input Y Frequency;
 Do i=1 To Frequency;
  Output;
 End;
 Keep Y;
Datalines;
0 70
1 38
2 17
3 10
4  9
5  3
6  2
7  1
;

/* 
 * Show original frequency table
 */
Proc Freq Data=AppleLeaves;
 Table Y;
Run;

/*
 * Examine a histogram of the data
 */
Proc GChart Data=AppleLeaves;
 VBar Y / Discrete;
Run;

Proc Univariate Data=AppleLeaves;
 Var Y;
Run;

/*
 * Fit a Poisson distribution to the data
 */
Title3 "Poisson Model";
Proc Genmod Data=AppleLeaves;
 Model Y = / Dist=Poisson Link=Log LRCI;
 Estimate "Population Mean" Intercept 1 / Exp;
 ODS Output ParameterEstimates=Parms;
Run;

/*
 * Compute Expected Probabilities. These
 * will be used in a GOF test to follow.
 */
Data Expected;
 If _N_=1 Then
  Do;
   Set Parms;
   Lambda=Exp(Estimate); /* First obs is ln(lambda) */
   ELambda=Exp(-Lambda);
   Retain Lambda ELambda;
  End;
 Do Y=0 to 10;
  Prob=(Lambda**Y)*ELambda/Gamma(Y+1); /* Poisson Probability */
  Expected=150*Prob;
  Cummulative+Prob;
  InvCum=1-Cummulative+Prob;
  Output;
 End;
 Stop;
 Keep Y Prob Expected Lambda Cummulative InvCum;
Run;
Title4 "Expected Probabilities";
Proc Print Data=Expected;
Run;
 
/*
 * Can use PROC FREQ to do GOF test, though
 * d.f. are not correct. Since some expected
 * values will be less than 1, we will group
 * the data for Y>=4 into a common group.
 */
Proc Format;
 Value YGroup 4-High="4+";
Run;

/*
 * Since there will be 5 cells in this table,
 * PROC FREQ will compute the d.f. to be 5-1=4.
 * However, the probabilities were predicted
 * by estimating the parameter Lambda using the
 * same data. Thus we need to lose 1 more d.f.
 * Thus, d.f.=5-1-1=3.
 */
Title4 "Pearson Chi-square Goodness-of-fit Test";
Title5 "Note: Degrees of Freedom Should Be 3";
Proc Freq Data=AppleLeaves;
 Table Y / Chisq NoCum TestP=(31.769 36.429 20.886 7.983 2.933);
 Format Y YGroup.;
Run;

/*
 * Repeat analysis using the Negative Binomial Model.
 * For these data the numerical algorithm would not 
 * properly converge, thus I fixed the INTERCEPT
 * parameter at LN(MU)=0.1369.
 */
Title3 "Negative Binomial Model";
Proc Genmod Data=AppleLeaves;
 Model Y = / Dist=NegBin Link=Log LRCI MaxIter=500;
 Estimate "Population Mean" Intercept 1 / Exp;
 ODS Output ParameterEstimates=Parms;
Run;

Data Expected;
 If _N_=1 Then
  Do;
   i=1;
   Set Parms Point=i Nobs=Nobs;
   Mu=Exp(Estimate); /* First obs is ln(Mu) */
   i=2;
   Set Parms Point=i Nobs=Nobs;
   k=Estimate;    /* Second obs is dispersion parameter */
   kinv=1/k;
   VarY=Mu+k*Mu**2;
   Retain Mu k VarY kinv;
  End;
 Do Y=0 to 10;
  Prob=Gamma(Y+kinv)/(Gamma(Y+1)*Gamma(kinv))*(k*mu)**Y/((1+k*mu)**(Y+kinv)); /* Neg binomial Probability */
  Expected=150*Prob;
  Cummulative+Prob;
  InvCum=1-Cummulative+Prob;
  Output;
 End;
 Stop;
 Keep Y Frequency Prob Expected Mu k kinv VarY Cummulative InvCum;
Run;
Title4 "Expected Probabilities";
Proc Print Data=Expected;
Run;

Proc Format;
 Value YGroup 5-High="5+";
Run;

Title4 "Pearson Chi-square Goodness-of-fit Test";
Title5 "Note: Degrees of Freedom Should Be 3";
Proc Freq Data=AppleLeaves;
 Table Y / Chisq NoCum TestP=(46.325 25.067 12.401 7.135 3.791 4.281);
 Format Y YGroup.;
Run;


 

 


 
Discrete Data Models
Poisson Data

The GENMOD Procedure

Model Information
Data Set WORK.POI
Distribution Poisson
Link Function Log
Dependent Variable Y
Observations Used 100
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 99 95.3649 0.9633
Scaled Deviance 99 95.3649 0.9633
Pearson Chi-Square 99 92.9479 0.9389
Scaled Pearson X2 99 92.9479 0.9389
Log Likelihood   2657.2623  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 2.7311 0.0255 2.6807 2.7807 11449.6 <.0001
Scale 0 1.0000 0.0000 1.0000 1.0000    

NOTE: The scale parameter was held fixed.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 2.7311 0.0255 0.05 2.6811 2.7811 11450 <.0001
Exp(Population Mean) 15.3500 0.3918 0.05 14.6010 16.1374    

 


 
Discrete Data Models
Poisson Data
Consider an Overdispersed Model

The GENMOD Procedure

Model Information
Data Set WORK.POI
Distribution Poisson
Link Function Log
Dependent Variable Y
Observations Used 100
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 99 95.3649 0.9633
Scaled Deviance 99 99.0000 1.0000
Pearson Chi-Square 99 92.9479 0.9389
Scaled Pearson X2 99 96.4908 0.9747
Log Likelihood   2758.5502  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 2.7311 0.0251 2.6816 2.7798 11886.0 <.0001
Scale 0 0.9815 0.0000 0.9815 0.9815    

NOTE: The scale parameter was estimated by the square root of DEVIANCE/DOF.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 2.7311 0.0251 0.05 2.6820 2.7802 11886 <.0001
Exp(Population Mean) 15.3500 0.3845 0.05 14.6145 16.1225    

 


 

 


 
Discrete Data Models
Negative Binomial Data

The GENMOD Procedure

Model Information
Data Set WORK.NB
Distribution Negative Binomial
Link Function Log
Dependent Variable Y
Observations Used 100
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 99 102.7304 1.0377
Scaled Deviance 99 102.7304 1.0377
Pearson Chi-Square 99 98.2249 0.9922
Scaled Pearson X2 99 98.2249 0.9922
Log Likelihood   2648.3986  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 2.7180 0.0451 2.6292 2.8078 3627.96 <.0001
Dispersion 1 0.1376 0.0291 0.0895 0.2066    

NOTE: The negative binomial dispersion parameter was estimated by maximum likelihood.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 2.7180 0.0451 0.05 2.6296 2.8064 3628.0 <.0001
Exp(Population Mean) 15.1500 0.6836 0.05 13.8676 16.5510    

 


 
Discrete Data Models
Negative Binomial Data
Treat as Overdispersed Poisson Model

The GENMOD Procedure

Model Information
Data Set WORK.NB
Distribution Poisson
Link Function Log
Dependent Variable Y
Observations Used 100
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 99 303.5469 3.0661
Scaled Deviance 99 99.0000 1.0000
Pearson Chi-Square 99 303.0198 3.0608
Scaled Pearson X2 99 98.8281 0.9983
Log Likelihood   848.8781  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 2.7180 0.0450 2.6285 2.8049 3650.24 <.0001
Scale 0 1.7510 0.0000 1.7510 1.7510    

NOTE: The scale parameter was estimated by the square root of DEVIANCE/DOF.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 2.7180 0.0450 0.05 2.6298 2.8062 3650.2 <.0001
Exp(Population Mean) 15.1500 0.6816 0.05 13.8714 16.5465    

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca

The FREQ Procedure

Y Frequency Percent Cumulative
Frequency
Cumulative
Percent
0 114 63.33 114 63.33
1 25 13.89 139 77.22
2 15 8.33 154 85.56
3 10 5.56 164 91.11
4 6 3.33 170 94.44
5 5 2.78 175 97.22
6 2 1.11 177 98.33
7 1 0.56 178 98.89
8 1 0.56 179 99.44
10 1 0.56 180 100.00

 


 

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca

The UNIVARIATE Procedure
Variable: Y

Moments
N 180 Sum Weights 180
Mean 0.95 Sum Observations 171
Std Deviation 1.70203624 Variance 2.89692737
Skewness 2.38180835 Kurtosis 6.56643091
Uncorrected SS 681 Corrected SS 518.55
Coeff Variation 179.16171 Std Error Mean 0.12686229
 
Basic Statistical Measures
Location Variability
Mean 0.950000 Std Deviation 1.70204
Median 0.000000 Variance 2.89693
Mode 0.000000 Range 10.00000
    Interquartile Range 1.00000
 
Tests for Location: Mu0=0
Test Statistic p Value
Student's t t 7.488435 Pr > |t| <.0001
Sign M 33 Pr >= |M| <.0001
Signed Rank S 1105.5 Pr >= |S| <.0001
 
Quantiles (Definition 5)
Quantile Estimate
100% Max 10
99% 8
95% 5
90% 3
75% Q3 1
50% Median 0
25% Q1 0
10% 0
5% 0
1% 0
0% Min 0
 
Extreme Observations
Lowest Highest
Value Obs Value Obs
0 114 6 176
0 113 6 177
0 112 7 178
0 111 8 179
0 110 10 180

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Poisson Model

The GENMOD Procedure

Model Information
Data Set WORK.CARPENTERBEES
Distribution Poisson
Link Function Log
Dependent Variable Y
Observations Used 180
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 179 421.6296 2.3555
Scaled Deviance 179 421.6296 2.3555
Pearson Chi-Square 179 545.8421 3.0494
Scaled Pearson X2 179 545.8421 3.0494
Log Likelihood   -179.7712  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 -0.0513 0.0765 -0.2050 0.0949 0.45 0.5024
Scale 0 1.0000 0.0000 1.0000 1.0000    

NOTE: The scale parameter was held fixed.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean -0.0513 0.0765 0.05 -0.2012 0.0986 0.45 0.5024
Exp(Population Mean) 0.9500 0.0726 0.05 0.8178 1.1036    

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Poisson Model
Expected Probabilities

Obs Lambda Y Prob Expected Cummulative InvCum
1 0.95000 0 0.38674 69.6134 0.38674 1.00000
2 0.95000 1 0.36740 66.1327 0.75414 0.61326
3 0.95000 2 0.17452 31.4130 0.92866 0.24586
4 0.95000 3 0.05526 9.9475 0.98393 0.07134
5 0.95000 4 0.01313 2.3625 0.99705 0.01607
6 0.95000 5 0.00249 0.4489 0.99954 0.00295
7 0.95000 6 0.00039 0.0711 0.99994 0.00046
8 0.95000 7 0.00005 0.0096 0.99999 0.00006
9 0.95000 8 0.00001 0.0011 1.00000 0.00001
10 0.95000 9 0.00000 0.0001 1.00000 0.00000
11 0.95000 10 0.00000 0.0000 1.00000 0.00000

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Poisson Model
Pearson Chi-square Goodness-of-fit Test
Note: Degrees of Freedom Should Be 3

The FREQ Procedure

Y Frequency Percent Test
Percent
0 114 63.33 38.67
1 25 13.89 36.74
2 15 8.33 17.45
3 10 5.56 5.53
4+ 16 8.89 1.61
 
Chi-Square Test
for Specified Proportions
Chi-Square 121.8554
DF 4
Pr > ChiSq <.0001

Sample Size = 180

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Negative Binomial Model

The GENMOD Procedure

Model Information
Data Set WORK.CARPENTERBEES
Distribution Negative Binomial
Link Function Log
Dependent Variable Y
Observations Used 180
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 179 143.4229 0.8012
Scaled Deviance 179 143.4229 0.8012
Pearson Chi-Square 179 151.6760 0.8474
Scaled Pearson X2 179 151.6760 0.8474
Log Likelihood   -114.7281  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 -0.0513 0.1451 -0.3329 0.2433 0.13 0.7237
Dispersion 1 2.7355 0.5787 1.7886 4.1172    

NOTE: The negative binomial dispersion parameter was estimated by maximum likelihood.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean -0.0513 0.1451 0.05 -0.3356 0.2330 0.13 0.7237
Exp(Population Mean) 0.9500 0.1378 0.05 0.7149 1.2624    

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Negative Binomial Model
Expected Probabilities

Obs Mu k kinv VarY Y Prob Expected Cummulative InvCum
1 0.95000 2.73552 0.36556 3.41880 0 0.62617 112.711 0.62617 1.00000
2 0.95000 2.73552 0.36556 3.41880 1 0.16530 29.754 0.79147 0.37383
3 0.95000 2.73552 0.36556 3.41880 2 0.08150 14.670 0.87297 0.20853
4 0.95000 2.73552 0.36556 3.41880 3 0.04641 8.353 0.91938 0.12703
5 0.95000 2.73552 0.36556 3.41880 4 0.02820 5.075 0.94757 0.08062
6 0.95000 2.73552 0.36556 3.41880 5 0.01778 3.200 0.96535 0.05243
7 0.95000 2.73552 0.36556 3.41880 6 0.01148 2.066 0.97683 0.03465
8 0.95000 2.73552 0.36556 3.41880 7 0.00754 1.357 0.98437 0.02317
9 0.95000 2.73552 0.36556 3.41880 8 0.00501 0.902 0.98938 0.01563
10 0.95000 2.73552 0.36556 3.41880 9 0.00336 0.606 0.99275 0.01062
11 0.95000 2.73552 0.36556 3.41880 10 0.00228 0.410 0.99502 0.00725

 


 
Discrete Data Models
Carpenter Bee Larvae Counts in Soap-tree Yucca
Negative Binomial Model
Pearson Chi-square Goodness-of-fit Test
Note: Degrees of Freedom Should Be 3

The FREQ Procedure

Y Frequency Percent Test
Percent
0 114 63.33 62.62
1 25 13.89 16.53
2 15 8.33 8.15
3 10 5.56 4.64
4 6 3.33 2.82
5+ 10 5.56 5.24
 
Chi-Square Test
for Specified Proportions
Chi-Square 1.3079
DF 5
Pr > ChiSq 0.9341

Sample Size = 180

 


 
Discrete Data Models
Mites on Apple Leaves

The FREQ Procedure

Y Frequency Percent Cumulative
Frequency
Cumulative
Percent
0 70 46.67 70 46.67
1 38 25.33 108 72.00
2 17 11.33 125 83.33
3 10 6.67 135 90.00
4 9 6.00 144 96.00
5 3 2.00 147 98.00
6 2 1.33 149 99.33
7 1 0.67 150 100.00

 


 

 


 
Discrete Data Models
Mites on Apple Leaves

The UNIVARIATE Procedure
Variable: Y

Moments
N 150 Sum Weights 150
Mean 1.14666667 Sum Observations 172
Std Deviation 1.50786158 Variance 2.27364653
Skewness 1.54453863 Kurtosis 2.06063766
Uncorrected SS 536 Corrected SS 338.773333
Coeff Variation 131.499556 Std Error Mean 0.12311638
 
Basic Statistical Measures
Location Variability
Mean 1.146667 Std Deviation 1.50786
Median 1.000000 Variance 2.27365
Mode 0.000000 Range 7.00000
    Interquartile Range 2.00000
 
Tests for Location: Mu0=0
Test Statistic p Value
Student's t t 9.313681 Pr > |t| <.0001
Sign M 40 Pr >= |M| <.0001
Signed Rank S 1620 Pr >= |S| <.0001
 
Quantiles (Definition 5)
Quantile Estimate
100% Max 7.0
99% 6.0
95% 4.0
90% 3.5
75% Q3 2.0
50% Median 1.0
25% Q1 0.0
10% 0.0
5% 0.0
1% 0.0
0% Min 0.0
 
Extreme Observations
Lowest Highest
Value Obs Value Obs
0 70 5 146
0 69 5 147
0 68 6 148
0 67 6 149
0 66 7 150

 


 
Discrete Data Models
Mites on Apple Leaves
Poisson Model

The GENMOD Procedure

Model Information
Data Set WORK.APPLELEAVES
Distribution Poisson
Link Function Log
Dependent Variable Y
Observations Used 150
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 149 284.3125 1.9081
Scaled Deviance 149 284.3125 1.9081
Pearson Chi-Square 149 295.4418 1.9828
Scaled Pearson X2 149 295.4418 1.9828
Log Likelihood   -148.4602  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 0.1369 0.0762 -0.0164 0.2827 3.22 0.0727
Scale 0 1.0000 0.0000 1.0000 1.0000    

NOTE: The scale parameter was held fixed.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 0.1369 0.0762 0.05 -0.0126 0.2863 3.22 0.0727
Exp(Population Mean) 1.1467 0.0874 0.05 0.9875 1.3315    

 


 
Discrete Data Models
Mites on Apple Leaves
Poisson Model
Expected Probabilities

Obs Lambda Y Prob Expected Cummulative InvCum
1 1.14667 0 0.31769 47.6541 0.31769 1.00000
2 1.14667 1 0.36429 54.6434 0.68198 0.68231
3 1.14667 2 0.20886 31.3289 0.89084 0.31802
4 1.14667 3 0.07983 11.9746 0.97067 0.10916
5 1.14667 4 0.02288 3.4327 0.99356 0.02933
6 1.14667 5 0.00525 0.7872 0.99881 0.00644
7 1.14667 6 0.00100 0.1504 0.99981 0.00119
8 1.14667 7 0.00016 0.0246 0.99997 0.00019
9 1.14667 8 0.00002 0.0035 1.00000 0.00003
10 1.14667 9 0.00000 0.0005 1.00000 0.00000
11 1.14667 10 0.00000 0.0001 1.00000 0.00000

 


 
Discrete Data Models
Mites on Apple Leaves
Poisson Model
Pearson Chi-square Goodness-of-fit Test
Note: Degrees of Freedom Should Be 3

The FREQ Procedure

Y Frequency Percent Test
Percent
0 70 46.67 31.77
1 38 25.33 36.43
2 17 11.33 20.89
3 10 6.67 7.98
4+ 15 10.00 2.93
 
Chi-Square Test
for Specified Proportions
Chi-Square 47.9694
DF 4
Pr > ChiSq <.0001

Sample Size = 150

 


 
Discrete Data Models
Mites on Apple Leaves
Negative Binomial Model

The GENMOD Procedure

Model Information
Data Set WORK.APPLELEAVES
Distribution Negative Binomial
Link Function Log
Dependent Variable Y
Observations Used 150
 
Parameter Information
Parameter Effect
Prm1 Intercept
 
Criteria For Assessing Goodness Of Fit
Criterion DF Value Value/DF
Deviance 149 152.2965 1.0221
Scaled Deviance 149 152.2965 1.0221
Pearson Chi-Square 149 139.4156 0.9357
Scaled Pearson X2 149 139.4156 0.9357
Log Likelihood   -128.0874  
 
Algorithm converged.
 
Analysis Of Parameter Estimates
Parameter DF Estimate Standard Error Likelihood Ratio 95%
Confidence Limits
Chi-Square Pr > ChiSq
Intercept 1 0.1369 0.1110 -0.0823 0.3575 1.52 0.2176
Dispersion 1 0.9760 0.2628 0.5446 1.5989    

NOTE: The negative binomial dispersion parameter was estimated by maximum likelihood.

 

Contrast Estimate Results
Label Estimate Standard Error Alpha Confidence Limits Chi-Square Pr > ChiSq
Population Mean 0.1369 0.1110 0.05 -0.0807 0.3544 1.52 0.2176
Exp(Population Mean) 1.1467 0.1273 0.05 0.9225 1.4253    

 


 
Discrete Data Models
Mites on Apple Leaves
Negative Binomial Model
Expected Probabilities

Obs Mu k kinv VarY Y Prob Expected Cummulative InvCum
1 1.14667 0.97600 1.02459 2.42995 0 0.46325 69.4880 0.46325 1.00000
2 1.14667 0.97600 1.02459 2.42995 1 0.25067 37.5999 0.71392 0.53675
3 1.14667 0.97600 1.02459 2.42995 2 0.13401 20.1011 0.84793 0.28608
4 1.14667 0.97600 1.02459 2.42995 3 0.07135 10.7026 0.91928 0.15207
5 1.14667 0.97600 1.02459 2.42995 4 0.03791 5.6869 0.95719 0.08072
6 1.14667 0.97600 1.02459 2.42995 5 0.02012 3.0181 0.97731 0.04281
7 1.14667 0.97600 1.02459 2.42995 6 0.01067 1.6004 0.98798 0.02269
8 1.14667 0.97600 1.02459 2.42995 7 0.00565 0.8482 0.99363 0.01202
9 1.14667 0.97600 1.02459 2.42995 8 0.00300 0.4493 0.99663 0.00637
10 1.14667 0.97600 1.02459 2.42995 9 0.00159 0.2379 0.99822 0.00337
11 1.14667 0.97600 1.02459 2.42995 10 0.00084 0.1260 0.99906 0.00178

 


 
Discrete Data Models
Mites on Apple Leaves
Negative Binomial Model
Pearson Chi-square Goodness-of-fit Test
Note: Degrees of Freedom Should Be 3

The FREQ Procedure

Y Frequency Percent Test
Percent
0 70 46.67 46.33
1 38 25.33 25.07
2 17 11.33 12.40
3 10 6.67 7.14
4 9 6.00 3.79
5+ 6 4.00 4.28
 
Chi-Square Test
for Specified Proportions
Chi-Square 2.1504
DF 5
Pr > ChiSq 0.8280

Sample Size = 150