【发布时间】:2022-05-16 05:29:42
【问题描述】:
我正在尝试使用扫帚来总结 19 个多项式回归模型的结果。我已经关注了这个SO Question 并尝试将它与broom::tidy 一起使用。我的脚本如下:
ALTER PROCEDURE [dbo].[spRegressionPeak]
@StudyID int
AS
BEGIN
Declare @sStudyID VARCHAR(50)
Set @sStudyID = CONVERT(VARCHAR(50),@StudyID)
--We are selecting the distinct StudyID, Productnumber, ResponseID and mean
values 1 thorugh 6 from the CodeMeans table.
--Note that spCodeMeans must be run before running this stored procedure to
ensure response data exists in the CodeMeans table.
--We use IsNull values to pass zeroes where an average wasn't calculated os that
the polynomial regression can be calculated.
DECLARE @inquery AS NVARCHAR(MAX) = '
Select
c.StudyID, c.RespID, c.LikingOrder, avg(isnull(C1,0)) as C1, avg(isnull(C2,0)) as C2, avg(isnull(C3,0)) as C3, avg(isnull(C4,0)) as C4,
avg(isnull(C5,0)) as C5, avg(isnull(C6,0)) as C6, avg(isnull(C7,0)) as C7, avg(isnull(C8,0)) as C8, avg(isnull(C9,0)) as C9,
avg(isnull(C10,0)) as C10, avg(isnull(C11,0)) as C11, avg(isnull(C12,0)) as C12, avg(isnull(C13,0)) as C13, avg(isnull(C14,0)) as C14,
avg(isnull(C15,0)) as C15, avg(isnull(C16,0)) as C16, avg(isnull(C17,0)) as C17, avg(isnull(C18,0)) as C18, avg(isnull(C19,0)) as C19
from ClosedStudyResponses c
where c.StudyID = @StudyID
group by StudyID, RespID, LikingOrder
order by RespID
'
--We are setting @inquery aka InputDataSet to be our initial dataset.
--R Services requires that a data.frame be passed to any calculations being
generated. As such, df is simply data framing the @inquery data.
--The res object holds the polynomial regression results by RespondentID and
LikingOrder for each of the averages in the @inquery resultset.
EXEC sp_execute_external_script @language = N'R'
, @script = N'
library(tidyr, broom)
studymeans <- InputDataSet
df <- data.frame(studymeans)
lin.mod.1 <- lm(df$LikingOrder ~ poly(df$C1,3, raw=TRUE))
lin.mod.2 <- lm(df$LikingOrder ~ poly(df$C2,3, raw=TRUE))
lin.mod.3 <- lm(df$LikingOrder ~ poly(df$C3,3, raw=TRUE))
lin.mod.4 <- lm(df$LikingOrder ~ poly(df$C4,3, raw=TRUE))
lin.mod.5 <- lm(df$LikingOrder ~ poly(df$C5,3, raw=TRUE))
lin.mod.6 <- lm(df$LikingOrder ~ poly(df$C6,3, raw=TRUE))
lin.mod.7 <- lm(df$LikingOrder ~ poly(df$C7,3, raw=TRUE))
lin.mod.8 <- lm(df$LikingOrder ~ poly(df$C8,3, raw=TRUE))
lin.mod.9 <- lm(df$LikingOrder ~ poly(df$C9,3, raw=TRUE))
lin.mod.10 <- lm(df$LikingOrder ~ poly(df$C10,3, raw=TRUE))
lin.mod.11 <- lm(df$LikingOrder ~ poly(df$C11,3, raw=TRUE))
lin.mod.12 <- lm(df$LikingOrder ~ poly(df$C12,3, raw=TRUE))
lin.mod.13 <- lm(df$LikingOrder ~ poly(df$C13,3, raw=TRUE))
lin.mod.14 <- lm(df$LikingOrder ~ poly(df$C14,3, raw=TRUE))
lin.mod.15 <- lm(df$LikingOrder ~ poly(df$C15,3, raw=TRUE))
lin.mod.16 <- lm(df$LikingOrder ~ poly(df$C16,3, raw=TRUE))
lin.mod.17 <- lm(df$LikingOrder ~ poly(df$C17,3, raw=TRUE))
lin.mod.18 <- lm(df$LikingOrder ~ poly(df$C18,3, raw=TRUE))
lin.mod.19 <- lm(df$LikingOrder ~ poly(df$C19,3, raw=TRUE))
lst <- lapply(ls(pattern="lin.mod"), get)
allmodels <- lapply(lst, summary)
res <- broom::tidy(allmodels)
'
, @input_data_1 = @inquery
, @output_data_1_name = N'res'
, @params = N'@StudyID int'
,@StudyID = @StudyID
--- Edit this line to handle the output data frame.
--WITH RESULT SETS ((StudyID int, RespID int, LikingOrder int, NewColumn int,
res varchar(max)));
END;
当将有效的 StudyID 输入参数传递给上面的脚本时,上面的脚本会引发以下错误:
Error in setNames(data.frame(data), value.name) :
'names' attribute [1] must be the same length as the vector [0]
Calls: source ... <Anonymous> -> <Anonymous> -> melt.default -> setNames
In addition: There were 50 or more warnings (use warnings() to see the first
50)
我的输入数据如下: 期望的结果是获得 data.frame 中所有 19 个模型的摘要。如何解决错误并修改代码以完成最终结果?
【问题讨论】:
-
这是很多/不可能复制的。你能把它归结为可重现的东西吗?
-
请发布您需要对其执行
tidy操作的代表性数据样本的dput。看起来应该是df -
@camille 我刚刚更新了问题以包含要分析的源数据的屏幕截图。感谢您的帮助!
-
试图从数据的屏幕截图中工作意味着我们必须输入该图片中的所有内容。 See here 发布了一个人们可以回答的 R 问题—
dput有助于实现这一目标。