library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(poweRlaw)
# Poisson
set.seed(1)
poisson <- rpois(1000, lambda=3)
hist(poisson, main = "Poisson dist")

k = unique(poisson)
pk = c()
for(ki in k){
pk <- c(pk, sum(poisson %in% ki) )
}
plot(k, pk)

logk = log(k); logPk = log(pk)
dfP = data.frame(logk, logPk)[!is.infinite(logk),]
plm = summary(lm(logPk ~ logk, data = dfP))
pRs = plm$r.squared
plot(logk, logPk, main = paste0("log-log transformation, Poisson, R-squared ", round(pRs, 4)))

# Power Law
set.seed(1)
xmin = 1; alpha = 2
PL = rpldis(1000, 1, 3)
hist(PL, main = "Power Law dist")

k = unique(PL)
pk = c()
for(ki in k){
pk <- c(pk, sum(PL %in% ki) )
}
plot(k, pk)

logk = log(k); logPk = log(pk)
dfLP = data.frame(logk, logPk)[!is.infinite(logk),]
pLlm = summary(lm(logPk ~ logk), data = dfLP)
pPLs = pLlm$r.squared
plot(logk, logPk, main = paste0("log-log transformation, Power Law, R-squared ", round(pPLs, 4)))

# Compare
p_vec <- c()
pl_vec <- c()
for(i in 1:1000){
set.seed(i)
poisson <- rpois(1000, lambda=3)
PL = rpldis(1000, 1, 3)
# Poisson
k = unique(poisson)
pk = c()
for(ki in k){
pk <- c(pk, sum(poisson %in% ki) )
}
logk = log(k); logPk = log(pk)
dfP = data.frame(logk, logPk)[!is.infinite(logk),]
plm = summary(lm(logPk ~ logk, data = dfP))
pRs = plm$r.squared
p_vec <- c(p_vec, pRs)
# Power Law
k = unique(PL)
pk = c()
for(ki in k){
pk <- c(pk, sum(PL %in% ki) )
}
logk = log(k); logPk = log(pk)
dfLP = data.frame(logk, logPk)[!is.infinite(logk),]
pLlm = summary(lm(logPk ~ logk), data = dfLP)
pPLs = pLlm$r.squared
pl_vec <- c(pl_vec, pPLs)
}
p_vec <- c()
pl_vec <- c()
for(i in 1:1000){
set.seed(i)
poisson <- rpois(1000, lambda=3)
PL = rpldis(1000, 1, 3)
# Poisson
k = unique(poisson)
pk = c()
for(ki in k){
pk <- c(pk, sum(poisson %in% ki) )
}
logk = log(k); logPk = log(pk)
dfP = data.frame(logk, logPk)[!is.infinite(logk),]
plm = summary(lm(logPk ~ logk, data = dfP))
pRs = plm$r.squared
p_vec <- c(p_vec, pRs)
# Power Law
k = unique(PL)
pk = c()
for(ki in k){
pk <- c(pk, sum(PL %in% ki) )
}
logk = log(k); logPk = log(pk)
dfLP = data.frame(logk, logPk)[!is.infinite(logk),]
pLlm = summary(lm(logPk ~ logk), data = dfLP)
pPLs = pLlm$r.squared
pl_vec <- c(pl_vec, pPLs)
}
print(paste0("Mean of Power Law: ",round(mean(pl_vec), 2)))
## [1] "Mean of Power Law: 0.87"
print(paste0("Mean of Poisson: ",round(mean(p_vec), 2)))
## [1] "Mean of Poisson: 0.6"
t.test(pl_vec, p_vec, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: pl_vec and p_vec
## t = 87.223, df = 1491.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.2581761 Inf
## sample estimates:
## mean of x mean of y
## 0.8680735 0.6049321