-
Notifications
You must be signed in to change notification settings - Fork 96
/
combos.sas
224 lines (185 loc) · 7.11 KB
/
combos.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*-----------------------------------------------------------------*
| Name: combos.sas |
| Title: Create some/all combinations of N things, K at a time |
Doc: http://www.datavis.ca/sasmac/combos.html
| ----------------------------------------------------------------|
| Procs: summary sort print |
| Macdefs: combos |
| Datasets: _temp_ |
| ----------------------------------------------------------------|
| Author: Michael Friendly <[email protected]> |
| Created: 17 Jun 1999 10:47:42 |
| Revised: 10 Feb 2006 09:11:26 |
* Version: 1.0 *
*-----------------------------------------------------------------*/
/*=
=Description:
The COMBOS macro generates symbolic combinations of N things taken K
at a time. The combinations may be specified to include any given
subset. The resulting combinations are returned as an output data set,
and in global macro variables.
==Method:
The macro uses the simple trick of using PROC SUMMARY to give all
possible combinations, using the THINGS as class variables.
=Usage:
The THINGS and SIZE parameters are positional, and required. The rest are
optional keyword parameters, which may be specified in any order.
==Parameters:
* THINGS The N things to combine: a list of blank-separated
words, or a variable range such as X1-X10. These
things become variables in the output data set.
They need not exist before the macro is called,
but they must be valid SAS names. The length of
the longest combination (including SEP characters)
must not exceed 200.
* SIZE Size (K) of each combination, a numeric value.
* INCLUDE= Items which must be included. If a number, then that
number of the first items in THINGS is included in
each combination.
* OUT=out Name of the output data set containing the combinations,
one observation for each.
* SEP= Separator string used within each combination. With
THINGS=A B C D, the default SEP gives combinations
like 'A B C', 'A B D', etc. [Default: SEP=%str( )]
* JOIN= Separator to join all combinations in the RESULT=
macro variable. With THINGS=A B C D, the default JOIN
gives RESULT='A B C, A B D'. [Default: JOIN=%str(, )]
* SORT= Specifies whether the combinations are sorted in the
output data set. [Default: SORT=Y]
* PRINT Whether to print the OUT= data set.[Default: PRINT=Y]
* RESULT=combos Name of a global macro result variable containing all
combinations. You may not use RESULT=RESULT (or any
other local macro variable).
* NCOMB=nc Name of macro result variable with the number of
combinations generated. If INCLUDE= (empty), or
INCLUDE=0, this is C(N,K).
=Examples:
%combos(a b c d e, 2, include=2);
produces one combination of size 2 which includes A and B:
OBS A B C D E COMBO SIZE
1 1 1 . . . A B 2
%combos(a b c d, 3, sep=-);
produces all 4 combinations of 4 things, 3 at a time:
OBS A B C D COMBO SIZE
1 1 1 1 . A-B-C 3
2 1 1 . 1 A-B-D 3
3 1 . 1 1 A-C-D 3
4 . 1 1 1 B-C-D 3
=*/
%macro combos(
things, /* the N things to combine */
size, /* size (K) of each combination */
include=, /* items which must be included */
out=out, /* output data set containing combos */
sep=%str( ), /* separator within each combo */
join=%str(, ), /* separator to join all combos */
sort=Y, /* specifies whether the combinations are sorted */
print=Y, /* whether to print the OUT= data set */
result=combos, /* name of macro result variable with all
combinations */
ncomb=nc /* name of macro result variable with C(N,K) */
);
%global &result &ncomb;
%let sort=%substr(%upcase(&sort),1,1);
%let print=%substr(%upcase(&print),1,1);
options nonotes;
/*
construct a dataset which contains just a=1; b=1; c=1; etc
for each word in things
*/
data _temp_;
array xxx &things;
do over xxx;
xxx = 1;
end;
%if %index(&things,-) > 0 %then %do;
data _null_;
set _temp_;
%* convert shorthand variable list to long form;
length _vname_ $ 8 _vlist_ $ 200;
array _xx_ &things;
do over _xx_;
call vname(_xx_,_vname_);
_vlist_ = trim(_vlist_)|| ' ' || trim(_vname_);
end;
put "NOTE: The THINGS=&THINGS list translates to: THINGS=" _vlist_;
call symput( 'things', trim(_vlist_) );
run;
%end;
/*
* use proc summary to find all combinations
*/
proc summary data=_temp_;
class &things;
output out=_out_;
/*
* If &include is a number, replace it with the first few things
*/
%if %length(&include) %then %do;
%if %verify(&include,0123456789)=0 %then %do;
%if &include=0 %then %do;
%let include=;
%end;
%else %do;
%let _ninc_ = &include;
%let include=;
%do i=1 %to &_ninc_;
%let include = &include %scan(&things,&i);
%end;
%put NOTE: Translating INCLUDE=&_ninc_ to INCLUDE=&include;
%end;
%end;
%end;
/*
* find those of the right size, combine words into combo
*/
data &out;
set _out_;
drop _freq_ _type_ _vname_ _k_ _ok_;
length _vname_ $8 combo $200;
size = n(of &things);
if size ^= &size then return;
_ok_ = 1;
array _xx_ &things;
%if %length(&include) %then %do;
array _in_ &include;
%end;
k = 0;
do over _xx_;
call vname(_xx_, _vname_);
if _xx_ ^= . then do;
_k_+1;
if _k_=1
then combo = trim(_vname_);
else combo = trim(combo)|| "&sep" || trim(_vname_);
end;
end;
%if %length(&include) %then %do;
do over _in_;
if _in_ = . then _ok_=0;
end;
%end;
if _ok_ then output;
%if &sort=Y %then %do;
proc sort data=&out;
by combo;
%end;
%if &print=Y %then %do;
proc print;
%end;
data _null_;
set &out end=eof;
retain result;
length result $200;
if _n_ = 1
then result = trim(combo);
else result = trim(result) || "&join" || trim(combo);
if eof then do;
call symput("&ncomb", left(put(_n_,3.)));
call symput("&result", trim(result));
if _n_=0 then put "WARNING: No combinations were created from &things, size &size, including &include" ;
end;
run;
%done:
options notes;
%mend;