I'm using speedglm
to estimate a logistic regression model on some data. I've created a reproducible example which generates the same error that I get using my original data.
library(speedglm)
n <- 10000
dtf <- data.frame( y = sample(c(0,1), n, 1),
x1 = as.factor(sample(c("a","b"), n, 1)),
x2 = rnorm(n, 30, 10))
m <- speedglm(y ~ x1 + x2, dtf, family=binomial())
summary(m)
The output is the following:
Generalized Linear Model of class 'speedglm':
Call: speedglm(formula = y ~ x1 + x2, data = dtf, family = binomial())
Coefficients:
------------------------------------------------------------------
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 3, 0
I've checked the source code of summary.speedglm
by executing getS3method("summary", "speedglm")
and found the code line which generates the error, but it didn't help to solve the problem.
PS: someone with 1500+ rep should create the speedglm
tag.
Marco Enea, the maintainer of speedglm
, asked to post the following temporary fix for summary.speedglm
and print.summary.speedglm
.
summary.speedglm <- function (object, correlation = FALSE, ...)
{
if (!inherits(object, "speedglm"))
stop("object is not of class speedglm")
z <- object
var_res <- as.numeric(z$RSS/z$df)
dispersion <- if (z$family$family %in% c("poisson", "binomial")) 1 else var_res
if (z$method == "qr") {
z$XTX <- z$XTX[z$ok, z$ok]
}
inv <- solve(z$XTX, tol = z$tol.solve)
covmat <- diag(inv)
se_coef <- rep(NA, length(z$coefficients))
se_coef[z$ok] <- sqrt(dispersion * covmat)
if (z$family$family %in% c("binomial", "poisson")) {
z1 <- z$coefficients/se_coef
p <- 2 * pnorm(abs(z1), lower.tail = FALSE)
} else {
t1 <- z$coefficients/se_coef
p <- 2 * pt(abs(t1), df = z$df, lower.tail = FALSE)
}
ip <- !is.na(p)
p[ip] <- as.numeric(format(p[ip], digits = 3))
dn <- c("Estimate", "Std. Error")
if (z$family$family %in% c("binomial", "poisson")) {
format.coef <- if (any(na.omit(abs(z$coef)) < 1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(z1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"z value", "Pr(>|z|)"))
} else {
format.coef <- if (any(abs(na.omit(z$coefficients)) <
1e-04))
format(z$coefficients, scientific = TRUE, digits = 4) else
round(z$coefficients, digits = 7)
format.se <- if (any(na.omit(se_coef) < 1e-04))
format(se_coef, scientific = TRUE, digits = 4) else
round(se_coef, digits = 7)
format.pv <- if (any(na.omit(p) < 1e-04))
format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
param <- data.frame(format.coef, format.se, round(t1,
digits = 4), format.pv)
dimnames(param) <- list(names(z$coefficients), c(dn,
"t value", "Pr(>|t|)"))
}
eps <- 10 * .Machine$double.eps
if (z$family$family == "binomial") {
if (any(z$mu > 1 - eps) || any(z$mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (z$family$family == "poisson") {
if (any(z$mu < eps))
warning("fitted rates numerically 0 occurred")
}
keep <- match(c("call", "terms", "family", "deviance", "aic",
"df", "nulldev", "nulldf", "iter", "tol", "n", "convergence",
"ngoodobs", "logLik", "RSS", "rank"), names(object),
0)
ans <- c(object[keep], list(coefficients = param, dispersion = dispersion,
correlation = correlation, cov.unscaled = inv, cov.scaled = inv *
var_res))
if (correlation) {
ans$correl <- (inv * var_res)/outer(na.omit(se_coef),
na.omit(se_coef))
}
class(ans) <- "summary.speedglm"
return(ans)
}
print.summary.speedglm <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
cat("Generalized Linear Model of class 'speedglm':\n")
if (!is.null(x$call))
cat("\nCall: ", deparse(x$call), "\n\n")
if (length(x$coef)) {
cat("Coefficients:\n")
cat(" ------------------------------------------------------------------",
"\n")
sig <- function(z){
if (!is.na(z)){
if (z < 0.001)
"***"
else if (z < 0.01)
"** "
else if (z < 0.05)
"* "
else if (z < 0.1)
". "
else " "
} else " "
}
options(warn=-1)
sig.1 <- sapply(as.numeric(as.character(x$coefficients[,4])),
sig)
options(warn=0)
est.1 <- cbind(format(x$coefficients, digits = digits),
sig.1)
colnames(est.1)[ncol(est.1)] <- ""
print(est.1)
cat("\n")
cat("-------------------------------------------------------------------",
"\n")
cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
"\n")
cat("\n")
}
else cat("No coefficients\n")
cat("---\n")
cat("null df: ", x$nulldf, "; null deviance: ", round(x$nulldev,
digits = 2), ";\n", "residuals df: ", x$df, "; residuals deviance: ",
round(x$deviance, digits = 2), ";\n", "# obs.: ", x$n,
"; # non-zero weighted obs.: ", x$ngoodobs, ";\n", "AIC: ",
x$aic, "; log Likelihood: ", x$logLik, ";\n", "RSS: ",
round(x$RSS, digits = 1), "; dispersion: ", x$dispersion,
"; iterations: ", x$iter, ";\n", "rank: ", round(x$rank,
digits = 1), "; max tolerance: ", format(x$tol, scientific = TRUE,
digits = 3), "; convergence: ", x$convergence, ".\n",
sep = "")
invisible(x)
if (x$correlation) {
cat("---\n")
cat("Correlation of Coefficients:\n")
x$correl[upper.tri(x$correl, diag = TRUE)] <- NA
print(x$correl[-1, -nrow(x$correl)], na.print = "", digits = 2)
}
}
Following 42' suggestion, I would also add the following:
environment(summary.speedglm) <- environment(speedglm)
environment(print.summary.speedglm) <- environment(speedglm)
The print.summary.speedglm
function has a tiny bug in it. If you change this line:
sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), sig))
To this line:
sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|z|)")), sig))
And also run:
environment(print.summary.speedglm) <- environment(speedglm)
You will not see the error message anymore.
The proper way to report bugs is to contact the maintainer (I'll send him an email):
maintainer('speedglm')
[1] "Marco Enea <[email protected]>"