## CAMBIARE DIRECTORY ----------------- ## CARICAMENTO DATI A<-read.table(file="dati_Campi_Flegrei.txt",header=TRUE) Piro <- A[1:19,1] B<-read.table(file="test_medicina.txt",header=TRUE) Medi <- B[,2] ----------------- ## VISUALIZZAZIONI DATI Dati <- Piro D <- sort(Dati) L <- length(Dati) F<- (1:L)/L - 0.5/L ## CDF plot(D,F, type="l") lines(c(D[1],D[L]),c(0,0),col="green") lines(c(D[1],D[L]),c(1,1),col="green") lines(c(0,0.01),c(0,1),col="green") ## S <- 1 - CDF S <- 1-F plot(D,S, type="l") lines(c(D[1],D[L]),c(0,0),col="green") lines(c(D[1],D[L]),c(1,1),col="green") lines(c(0,0.01),c(0,1),col="green") ## -LOG(S) PIU' RETTA DI REGRESSIONE SL <- -log(1-F) plot(D,SL, type="l") REG<-lm(SL~D) a <- REG$coefficient[2] b <- REG$coefficient[1] lines(c(D[1],D[L]),c((a*D[1]+b),(a*D[L]+b)),col="red") ## -LOG(S) PIU' RETTA DI REGRESSIONE SENZA OUTLIER SL <- -log(1-F) plot(D,SL, type="b") REG<-lm(SL[1:(L-1)]~D[1:(L-1)]) a <- REG$coefficient[2] b <- REG$coefficient[1] lines(c(D[1],D[L]),c((a*D[1]+b),(a*D[L]+b)),col="red") ----------------------------------- ## DATI MEDICINA Dati <- Medi D <- sort(Dati) L <- length(Dati) F<- (1:L)/L - 0.5/L ## CDF plot(D,F, type="l") lines(c(D[1],D[L]),c(0,0),col="green") lines(c(D[1],D[L]),c(1,1),col="green") lines(c(0,0.01),c(0,1),col="green") ## S <- 1 - CDF S <- 1-F plot(D,S, type="l") lines(c(D[1],D[L]),c(0,0),col="green") lines(c(D[1],D[L]),c(1,1),col="green") lines(c(0,0.01),c(0,1),col="green") ## -LOG(S) PIU' RETTA DI REGRESSIONE SL <- -log(1-F) plot(D,SL, type="l") REG<-lm(SL~D) a <- REG$coefficient[2] b <- REG$coefficient[1] lines(c(D[1],D[L]),c((a*D[1]+b),(a*D[L]+b)),col="red") ## LOGARITMO ITERATO SLL <- log(-log(1-F)) plot(D,SLL, type="l") ## LOGARITMO ITERATO in Y, LOG in X SLL <- log(-log(1-F)) DL <- log(D) plot(DL,SLL, type="l") plot(DL[10:L],SLL[10:L], type="l") ## SOLO CODA DESTRA ## LOGARITMO ITERATO in Y, LOG in X ## PIU' RETTA DI REGRESSIONE gap <-500 N <- L-gap SLL <- log(-log(1-F[N:L])) DL <- log(D[N:L]) plot(DL,SLL, type="l") REG<-lm(SLL~DL) a <- REG$coefficient[2] b <- REG$coefficient[1] lines(c(DL[1],DL[gap]),c((a*DL[1]+b),(a*DL[gap]+b)),col="red") start.gap <-500 end.gap <- 20 L0 <- L-start.gap L1 <- L-end.gap REG<-lm(log(-log(1-F[L0:L1]))~log(D[L0:L1])) a <- REG$coefficient[2] b <- REG$coefficient[1] gap <-100 N <- L-gap SLL <- log(-log(1-F[N:L])) DL <- log(D[N:L]) plot(DL,SLL, type="l") lines(c(DL[1],DL[gap]),c((a*DL[1]+b),(a*DL[gap]+b)),col="red") --------------------------------- --------------------------------- ## HS-FIT OF MEDI, WEIBULL COMPARISON Dati <- Medi D <- sort(Dati) L <- length(Dati) F<- (1:L)/L - 0.5/L start.gap <-500 end.gap <- 20 L0 <- L-start.gap L1 <- L-end.gap REG<-lm(log(-log(1-F[L0:L1]))~log(D[L0:L1])) a <- REG$coefficient[2] b <- REG$coefficient[1] gap <-100 N <- L-gap SLL <- log(-log(1-F[N:L])) DL <- log(D[N:L]) plot(DL,SLL, type="l") lines(c(DL[1],DL[gap]),c((a*DL[1]+b),(a*DL[gap]+b)),col="red") shape <- a scale <- exp(-b/a) c(shape,scale) require(MASS) Medi.plus <- D[4:length(D)] fitdistr(Medi.plus, "weibull") ------------------- ## Q-Q PLOT DELLA HS-WEIBULL e DELLA ML-WEIBULL Dati <- Medi a<-shape s<-scale quant <- function(x) {qweibull(x,a,s)} x<-(-0:700)/10 L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") q <- quant(F.hat) lines(Dati.ord,q, type="b") Dati <- Medi a<-3.58 s<-38.84 quant <- function(x) {qweibull(x,a,s)} x<-(-0:700)/10 L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") q <- quant(F.hat) lines(Dati.ord,q, type="b") --------------------------------- ## HS-FIT OF PIRO WITHOUT OUTLIER, Q-Q PLOT, ITERATED LOGARITHM require(MASS) Dati <- sort(Piro)[1:19] fitdistr(Dati, "weibull") Dati <- sort(Piro)[1:18] fitdistr(Dati, "weibull") a<-1.44 s<-28.25 quant <- function(x) {qweibull(x,a,s)} x<- 1:100 L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") q <- quant(F.hat) lines(Dati.ord,q, type="b") D <- sort(Piro)[1:18] L <- length(D) F<- (1:L)/L - 0.5/L SL <- -log(1-F) plot(D,SL, type="l") REG<-lm(SL~D) a <- REG$coefficient[2] b <- REG$coefficient[1] lines(c(D[1],D[L]),c((a*D[1]+b),(a*D[L]+b)),col="red") SLL <- log(-log(1-F)) DL <- log(D) plot(DL,SLL, type="l") -------------------------------------------- -------------------------------------------- ## MIXTURES K<-rbinom(10000,1, 0.7) X<-matrix(nrow = 10000) X0<-rnorm(10000,mean=0, sd=1) X1<-rnorm(10000,mean=5, sd=1) X<-K*X1+(1-K)*X0 hist(X,100) K<-rbinom(10000,1, 0.7) X<-matrix(nrow = 10000) X0<-rnorm(10000,mean=0, sd=1) X1<-rnorm(10000,mean=5, sd=1) X<-K*X1+(1-K)*X0 sort(X)[9500] -------------------------------------------- -------------------------------------------- ## MELANGE OF MEDI shape <- 3.78 scale <- 39.1 m <- mean(Medi) s <- sd(Medi) tmid <- 35 lambda <- 5 phi <- function(x) {(1/pi)*atan((x-tmid)/lambda)+0.5} F.plus <- function(x) {pweibull(x,shape,scale)} F.minus <- function(x) {pnorm(x,m,s)} Phi <- function(x) {phi(x)*F.plus(x)+(1-phi(x))*F.minus(x)} x<-(-0:700)/10 plot(x,Phi(x)) --- ## densità: plot(x[1:700],(Phi(x[2:701])-Phi(x[1:700]))) --- Dati <- Medi x<-(-0:700)/10 quant <- function(a) {x[which.min(abs(Phi(x)-a))]} L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") qq<-1:L for (k in 1:L) { qq[k] <- quant(F.hat[k]) } lines(Dati.ord,qq, type="b") ------------------------------ ## MIGLIORAMENTO CODA SINISTRA shape <- 3.78 scale <- 39.1 m <- mean(Medi) s <- sd(Medi)+1 tmid <- 25 lambda <- 5 phi <- function(x) {(1/pi)*atan((x-tmid)/lambda)+0.5} F.plus <- function(x) {pweibull(x,shape,scale)} F.minus <- function(x) {pnorm(x,m,s)} Phi <- function(x) {phi(x)*F.plus(x)+(1-phi(x))*F.minus(x)} Dati <- Medi x<-(-0:700)/10 quant <- function(a) {x[which.min(abs(Phi(x)-a))]} L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") qq<-1:L for (k in 1:L) { qq[k] <- quant(F.hat[k]) } lines(Dati.ord,qq, type="b") plot(x[1:700],(Phi(x[2:701])-Phi(x[1:700]))) --------------- ## CONFRONTO CON ML WEIBULL E ML NORMAL Dati <- Medi a<-3.58 s<-38.84 quant <- function(x) {qweibull(x,a,s)} x<-(-0:700)/10 L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") q <- quant(F.hat) lines(Dati.ord,q, type="b") Dati <- Medi m<-34.97 s<-11.06 quant <- function(x) {qnorm(x,m,s)} x<-(-0:700)/10 L <- length(Dati) F.hat<- (1:L)/L - 0.5/L Dati.ord<-sort(Dati) plot(x,x, type="l") q <- quant(F.hat) lines(Dati.ord,q, type="b") ------------------------------- ## RANDOM NUMBER GENERATION FROM MELANGE n <- 1500 U <- runif(n, min=0, max=1) shape <- 3.78 scale <- 39.1 m <- mean(Medi) s <- sd(Medi)+1 tmid <- 25 lambda <- 5 phi <- function(x) {(1/pi)*atan((x-tmid)/lambda)+0.5} F.plus <- function(x) {pweibull(x,shape,scale)} F.minus <- function(x) {pnorm(x,m,s)} Phi <- function(x) {phi(x)*F.plus(x)+(1-phi(x))*F.minus(x)} x<-(-0:700)/10 quant <- function(a) {x[which.min(abs(Phi(x)-a))]} X <- 1:n for (k in 1:n) { X[k] <- quant(U[k]) } hist(X,50,freq=FALSE) hist(Medi,50,freq=FALSE) -------------------- -------------------- -------------------- ## PARETO, GENERAL H <- function(x) {(sign(x)+1)/2} H.tm <- function(x,tm) {(sign(x-tm)+1)/2} pPareto <- function(t,tm,alp) {(1-(tm/t)^alp)*H.tm(t,tm)} alp<-2 tm<-1 t<- (1:500)/100 plot(t,pPareto(t,tm,alp)) dPareto <- function(t,tm,alp) {(alp/tm)*(tm/t)^(alp+1)*H.tm(t,tm)} t<- (1:500)/100 plot(t,dPareto(t,tm,alp)) qPareto <- function(a,tm,alp) {tm*(1-a)^(-1/alp)} --------------------------------- ## PARETO FIT X<-....... z<-(sd(X)/mean(X))^2 alp<- 2*z/(z-1) tm<-(mean(X)/2)*(1+1/z) ----------------------------------- ----------------------------------- -----------------------------------