p=.5
?p=0.5
p=.5
?N <- 100; n <- 60; p <- 0.5
muestras <- rbinom(10000, N, p)
hist(muestras, breaks = 30, col = "gray", main = "", xlab = "")
abline(v = n, col = "red")
mean(muestras >= n)
## [1] 0.0269
El p-valor es la proporción de muestras que exceden (en este caso) el valor obtenido.
Conceptualmente, es \(P(D|H_0)\), la probabilidad de tus datos dada la hipótesis nula.
binom.test(n, N, p, "greater")
##
## Exact binomial test
##
## data: n and N
## number of successes = 60, number of trials = 100, p-value =
## 0.02844
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.5129758 1.0000000
## sample estimates:
## probability of success
## 0.6
library(rstan)
stanmodelcode <- '
data {
int<lower=1> N;
int n;
}
parameters {
real<lower=0, upper = 1> p;
}
model {
// priori no informativa
p ~ beta(1,1);
// verosimilitud
n ~ binomial(N, p);
}
'
fit <- stan(model_code = stanmodelcode,
data = list(N = 100, n = 60),
iter=12000, warmup=2000,
chains=4, thin=10)
res <- as.data.frame(fit)
hist(res$p, breaks = 30, col = "gray", main = "", xlab = "")
abline(v = 0.5, col = "red")
modelo <- lm(dist ~ speed, data = cars)
plot(cars$speed, cars$dist, xlab = "velocidad", ylab = "distancia", main = "")
abline(modelo, col = "red")
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
stanmodelcode <- '
data {
int N;
vector[N] speed;
vector[N] dist;
}
parameters {
real a0;
real a1;
real<lower = 0, upper = 100> sigma;
}
model {
// prioris
a0 ~ cauchy(0, 100);
a1 ~ cauchy(0, 100);
sigma ~ cauchy(0, 5);
// verosimilitud
for (i in 1:N)
dist[i] ~ normal(a0 + a1 * speed[i], sigma);
}
'
fit <- stan(model_code = stanmodelcode,
data = list(N = nrow(cars), speed = cars$speed, dist = cars$dist),
iter=12000, warmup=2000,
chains=4, thin=10)
tmp <- as.data.frame(fit)$a1
hist(tmp, breaks = 30, col = "gray", xlab = "", freq = FALSE,
main = "coef speed: posteriori (hist) vs lm (red)")
coefs <- summary(modelo)$coefficients[2,]
curve(dnorm(x, coefs[1], coefs[2]), from = min(tmp), to = max(tmp), col = "red", add = TRUE)
library(ggplot2)
tmp <- as.data.frame(fit)[, c("a0", "a1")]
ggplot(cars, aes(x = speed, y = dist)) + geom_point() +
geom_abline(data = tmp, aes(slope = a1, intercept = a0), alpha = 0.01) +
geom_abline(intercept = coef(modelo)[1], slope = coef(modelo)[2], col = "red")
library(rstanarm)
post <- stan_lm(dist ~ speed, data = cars,
prior = R2(location = 0.5, what = "median"),
chains = 4, cores = 4, seed = 1234)
lm
, glm
, lmer
, etc.plot(Nile)
fit <- StructTS(Nile, type = "level")
lines(fitted(fit), lty = 2)
Tenemos un proceso subyacente real
\(m_{t+1} = m_t + \epsilon_t\), donde \(\epsilon_t \sim N(0, \sigma_0)\)
y observaciones (los datos) generados por el proceso subyacente mediante
\(x_t = m_t + \eta_t\), donde \(\eta_t \sim N(0, \sigma_1)\)
codigo <- '
data {
int N;
vector[N] nilo;
}
parameters {
vector[N] m;
real<lower=0, upper = 1000> sigma0;
real<lower=0, upper = 1000> sigma1;
}
model {
for (i in 1:N)
m[i] ~ normal(1000, 500);
nilo[1] ~ normal(m[1], sigma1);
for (i in 2:N){
m[i] ~ normal(m[i-1], sigma0);
nilo[i] ~ normal(m[i], sigma1);
}
}
'
fit <- stan(model_code = codigo,
data = list(N = length(Nile), nilo = as.vector(Nile)),
iter=12000, warmup=2000,
chains=4, thin=10)