一些用于创建以表格形式显示分类数据的频率计数并旋转数据本身的输出的选项:
Proc TABULATE
-
Proc REPORT
Proc FREQUENCY
-
Proc TRANSPOSE - 你真的想要数据(周)作为元数据(列名)
-
SQL(辛苦)
假设您的数据包含visitId、date 和SS 列:
data visits;
call streaminit(1234);
do date = '01jan2019'd to '31dec2019'd;
do _n_ = 1 to 5 + rand('uniform', 11); /* between 5 and 15 ss codes a day */
visitId + 1;
length ss $5;
ss = scan ("CS,FALL,ELBOW,ANKLE,LS,PS,SA,VV",ceil(rand('uniform',8)));
output;
end;
end;
format date yymmdd10.;
run;
计算一个包含访问周的新变量。此变量用作聚合分组的存储桶。
data have;
set visits;
weekof = intnx('week', date, 0); * compute bucket value for aggregation over weeks;
attrib weekof format=mmddyy10. label='Week of';
run;
使用过程生成输出:
proc tabulate data=have;
title 'Tabulate - weeks are columns';
where year(weekof) = 2019 and month(weekof) = 11; * restrict to a single month;
class ss weekof;
table ss='', weekof * n=''; * column dimension is weekof (one column per weekof value);
run;
proc tabulate data=have;
title 'Tabulate - weeks are rows';
where year(weekof) = 2019 and qtr(weekof) = 4;
class ss weekof;
table weekof, ss=''*n='' / nocellmerge; * row dimension is weekof (one row per weekof value);
table weekof='', ss=''*n='' / box='Week of'; * row dimension is weekof (one row per weekof value);
run;
proc report data=have split='A0'x;
title 'Report - weeks are columns';
where year(weekof) = 2019 and month(weekof) = 11; * restrict to a single month;
column ss weekof;
define weekof / across;
define ss / group;
run;
proc freq data=have;
title 'Freq - weeks are columns';
where year(weekof) = 2019 and month(weekof) = 11; * restrict to a single month;
table ss * weekof / norow nocol nocum nopercent;
run;
转置
计算 SS 和周的计数,然后转置
proc sql;
create table have_counts as
select ss, weekof, count(*) as freq
from have
group by ss, weekof
order by ss, weekof
;
proc transpose data=have_counts out=have_across_week(drop=_name_);
where year(weekof) = 2019 and month(weekof) = 11; * restrict to a single month;
by ss;
id weekof;
var freq;
run;
SQL
用于透视的 SQL 代码在编码过程中既繁琐又容易出错。当新日期进入数据时,它也不会自动扩展。有很多类似的声明(那些SUMs)被称为壁纸代码,谁喜欢挂壁纸?
proc sql;
create table ss_freq_across_weeks as
select
ss
, sum ( intnx('week', date, 0) = '03-NOV-2019'D ) as week1 label = 'Week of 11/03/2019'
, sum ( intnx('week', date, 0) = '10-NOV-2019'D ) as week2 label = 'Week of 11/10/2019'
, sum ( intnx('week', date, 0) = '17-NOV-2019'D ) as week3 label = 'Week of 11/17/2019'
/*...*/
from have
group by ss
;