*hand29.txt; data ca88air; infile 'a:\CA88AIR.TXT'; input O3 CO NO3 SO4 TEMP HUMID DATE STATION $ MONTH; format date date7.; cards; PROC PRINT; RUN; options nodate nonumber ; GOPTIONS BORDER reset; * Create box-whisker plots. ***********************************************************************************; title1;title2;title3; footnote; proc sort data= ca88air (keep=co station) out=caair; by station co; run; * determine the median, and quartile statistics for each station; proc univariate data=caair noprint; by station; var co; output out=stats median=median q1=q1 q3=q3 qrange=qrange; run; * Determine the whisker endpoints; * Whisker endpoints are the most extreme data values that are * within 1.5*qrange of the quartiles.; data stats2; merge stats caair; by station; retain lowpt highpt; drop co; if first.station then do; lowpt=.; highpt=.; end; * does this point determine the whisker end point?; if q1-1.5*qrange <= co <= q3+1.5*qrange then do; * look for the smallest value that is between * q1-1.5*qrange and q1; if lowpt=. then lowpt = co; * look for the largest value that is between * q3+1.5*qrange and q3; if highpt=. then highpt = co; else highpt=max(highpt,co); end; if last.station then output; run; * combine the stats with the data and retain extreme points; data both; merge stats2 caair; by station; if first.station then do; * Build the box and whiskers from the summary stats; stacnt + 1; * Whiskers are dotted lines; pltvar=1; * start at the top whisker; xvar=stacnt ; yvar=highpt ; output; yvar=q3 ; output; xvar=. ; output; xvar=stacnt ; yvar=lowpt ; output; yvar=q1 ; output; xvar=. ; output; * The box is a solid line; pltvar=2; xvar=stacnt+.3; yvar=q3 ; output; yvar=median ; output; xvar=stacnt-.3; output; xvar=stacnt+.3; output; yvar=q1 ; output; xvar=stacnt-.3; output; yvar=q3 ; output; xvar=stacnt+.3; output; xvar=. ; output; end; * plot outliers; xvar=stacnt; yvar=co; * Determine where this point falls; * Extreme outliers; if co < q1-3*qrange or co > q3+3*qrange then do; pltvar=4; output; end; else if co < q1-1.5*qrange or co > q3+1.5*qrange then do; pltvar=3; output; end; run; * Control the vertical axis; axis1 label=(h=1.5 f=simplex a=90 'p.p.m.') value= (h=1.5 f=simplex); * Control the horizontal axis; axis2 label=(h=1.5 f=simplex 'STATIONS') order=(0 to 4 by 1) major=none minor=none value= (h=1.5 f=simplex t=1 ' ' t=2 'AZU' t=3 'LIV' t=4 'SFO' t=5 ' '); * Define the symbols; symbol1 v=none c=black l=2 i=join; symbol2 v=none c=black l=1 i=join; *check the SYMBOL window; symbol3 v=circle c=black; symbol4 v=diamond c=black; * Plot the data; proc gplot data=both; plot yvar*xvar=pltvar / nolegend skipmiss vaxis=axis1 haxis=axis2; title1 h=2 '1988 Carbon Monoxide Readings'; footnote1 j=l h=2 f=simplex 'Figure 4.3.5'; run; quit;