%macro median (intsize=, intvals=, mdn=,q1=,q3=,samestep=0); /* ---------------------------------------------------------------- this macro generates code for a sas link routine which estimates the median and/or the 1st, 3rd quartiles from a distribution table. two of the *required* parameters are: 1. intsize: specifies the name of an array composed of the widths of all intervals in order. lowest interval assumed to start at 0. (if not, add offset to result). calling routine typically has to set up this array with retained variables used to hold the values. a drop stmt in the calling routine should be used to avoid saving those vars. 2. intvals: specifies name of a corresponding array composed of the frequency (count) associated with each interval in order as above. (both arrays should have the same index variable) if you want the macro to actually do something then you ****must specify one or more of the following 3 parameters: 1. mdn: specifies the name of the median variable (i.e. the macro will assign the estimated median to a variable with this name.) 2. q1: specifies the name of the variable to which to 1st quartile value is to be assigned. 3. q3: specifies the name of the variable to which to 3rd quartile value is to be assigned. (if you only specify a value for mdn then only the median will be calculated, etc.) see $6386.public.applctns(agg901) for a good sample invocation. a trivial example: array w w1-w3; retain w1 5 w2 10 w3 10; *-intervals are 0-5, 5-15 and 15-25-*; drop w1-w3; array c c1-c3; *--defined on input. c1 is count associated with interval 1, c2 interval 3, c3 interval 3--; %median(intsize=w,intvals=c,mdn=medtablc,q1=q1tablc,q3=q3tablc) the macro would determine the median and the 1st and 3rd quartiles of the distribution represented by the c array. the varialbes medtablc, q1tablc and q3tablc would be assigned. in addition to these required parms the optional parameter samestep should be used (*must* be used) when invoking the macro multiple times in the same data step. for the 2nd and subsequent invocations in a step you must specify samestep=1 to avoid generating a drop statement which would cause the step to bomb. notes and support by john blodgett (jgb) and jim struthers (jws). Converting to lowercase and removing use "do over" with implicit arrays. jgb, 7/2001. ---------------------------------------------------------------*/ %if &mdn ne %str() %then %str(&mdn=.;); %*--set median to missing--*; %if &q1 ne %str() %then %str(&q1=.;); %*--set 1st quartile to missing--*; %if &q3 ne %str() %then %str(&q3=.;); %*--set 3rd quartile to missing--*; _haf=0; *--total count for table--*; do _i_=1 to dim(&intvals); _haf+&intvals{_i_}; _nints+1; end; if _haf>0. then do; %if &q1 ne %str() %then %str(_q1=_haf*.25;); %*--one fourth of count should fall below this point--*; %if &q3 ne %str() %then %str(_q3=_haf*.75;); %*--three fourths of count should fall below this point--*; %if &mdn ne %str() %then %str(_haf=_haf*.5;); %*--half of count should fall below this point--*; _top=0.; %*--total of cells to current cell--*; _slot=0.; %*--total of intervals to current interval--*; *---add intervals and their counts until desired point is exceeded-*; _quit = 0; _i_ = 1; do while (_i_ <= _nints & _quit = 0); _top=_top+&intvals{_i_}; _slot=_slot+ &intsize{_i_}; %if &q1 ne %str() %then %do;*--determine lower quartile--*; if _q1<=_top & &q1=. then do; if _q1=_top then do; _slot2=_slot; _i_ = _i_+1; do while (&intvals{_i_}=0); _slot=_slot+&intsize{_i_}; _i_ = _i_+1; end; _i_ = _i_-1; &q1=(_slot2+_slot)/2; end; else do; if &intvals{_i_}=0 then &q1=_slot-&intsize{_i_}; else &q1=_slot-(((_top-_q1)/&intvals{_i_})*&intsize{_i_}); %*--subtract amount by which current interval exceeds half--*; end; %if &q3 eq and &mdn eq %then %str(_quit = 1;); end; %end; %if &mdn ne %str() %then %do; *--determine median--*; if _haf<=_top & &mdn=. then do; if _haf=_top then do; _slot2=_slot; _i_ = _i_+1; do while (&intvals{_i_}=0); _slot=_slot+&intsize{_i_}; _i_ = _i_+1; end; _i_ = _i_-1; &mdn=(_slot2+_slot)/2; end; else do; if &intvals{_i_}=0 then &mdn=_slot-&intsize{_i_}; else &mdn=_slot-(((_top-_haf)/&intvals{_i_})*&intsize{_i_}); %*--subtract amount by which current interval exceeds half--*; end; %if &q3 eq %then %str(_quit = 1;); end; %end; %if &q3 ne %str() %then %do;*--determine upper quartile--*; if _q3<=_top & &q3=. then do; if _q3=_top then do; _slot2=_slot; _i_ = _i_+1; do while (&intvals{_i_}=0); _slot=_slot+&intsize{_i_}; _i_ = _i_+1; end; _i_ = _i_-1; &q3=(_slot2+_slot)/2; end; else do; if &intvals{_i_}=0 then &q3=_slot-&intsize{_i_}; else &q3=_slot-(((_top-_q3)/&intvals{_i_})*&intsize{_i_}); %*--subtract amount by which current interval exceeds half--*; end; _quit = 1; end; %end; _i_ = _i_ + 1; end; *--- of "do while"; end; *--- of "if-then"; %if &samestep eq 0 %then %do; drop _haf _top _slot _slot2 _quit _nints %if &q1 ne %str() %then %str( _q1 ); %if &q3 ne %str() %then %str( _q3 ); %str(;) %end; %mend median;