## This file contains the companion codes for the statistical ## method described in the paper ## ## "Record events attribution in climate studies" (P.Naveau, J.Worms, 2020) ## ## submitted for publication in Environmetrics. ## Version : september 2020 ## ## Contact : Julien Worms (julien.worms@uvsq.fr) (University of Paris Saclay & Versailles) ## Requires the following functions : ## - npudist() (library np), ## - mle() (library (stats4) ## - ecdf() (library stats) ## - gmm() (library gmm) ## - WLK.test() (library EWGoF) library(stats4); library(gmm); library(stats); library(np); library(EWGoF) ## The main functions are described below (part A), while the other technical functions ## can be found in B of this same file. ## ## At the end of part A, one can find a function for generating simulated settings ## satisfying the W-class assumption described in the paper. ####################### ## A. Main functions ## ####################### ## The starting ingredient of all the code below is the couple of matrices matX and matZ ## described in the next function, having respectively m and n lines, the same number of columns, ## and containing respectively the counterfactual and factual datasets. GZestimation <- function(matX,matZ,methodGhatm="ecdfmodif",bandwth=0,b=0.05){ ## This function massively computes the values ## \hat{G}_m (Z_i) ## for a series of factual and counterfactual datasets. ## ## The output of this function is the input for the important function weibullGMMestim(). ## ## Input : ## - matX : a matrix whose columns are the counterfactual data samples X ## - matZ : a matrix whose columns are the factual data samples Z ## Important : ## > matX and matZ must have the same number of columns, this number being for instance ## the number of gridpoints of the studied area (1 column=1 gridpoint) ## They are allowed to be simple vectors (1 column each) ## > matX and matZ can have (and in general have) a different number of lines ## (the counterfactual and factual data sizes noted m and n in the paper) ## ## - methodGhatm : indicates the method used for computing the nonparametric estimate ## of the counterfactual cdf \hat{G}_m ## > "npudist" : uses the kernel routine npudist() from the library "np" ## > "ecdf" : the usual empirical cdf is computed ## > "ecdfmodif" : a small variation of the usual empirical cdf ## is considered, so that it is never equal to 0 nor 1 ; ## a parameter b, between 0 and 1, is required ## (by default, b=0.05, see formula (4) in the paper) ## ## - bandwth : an optional parameter used in the case the npudist() function is used ## (if set to 0, then a data-driven bandwidth will be used in npudist(), and ## otherwise the specified bandwidth parameter will be used, which will very ## strongly diminish the computing time) ## - b : an optional parameter, used in the case methodGhatm="ecdfmodif" ## ## Output : ## - if m and n denote the number of lines of matX and matZ, and N their ## number of columns, the output is a n x N matrix matGm , ## which j-th column contains the n values \hat{G}_m(Z_ij) , ## where Z_ij are the n elements of the j-th column of matZ, ## and \hat{G}_m is the estimate of the cdf G of X, computed ## from the elements of the j-th column of matX ## ## Requires : functions npudist() and ecdf() from the np and stats packages ## ## Rem : with methodGhatm="npudist", it may happen that some values are 1 or 0 ## when the factual and counterfactual datasets are very different matX <- as.matrix(matX) matZ <- as.matrix(matZ) dimnsZ=dim(matZ) matGm <- matrix(rep(0,prod(dimnsZ)),nrow=dimnsZ[1],ncol=dimnsZ[2]) for (j in 1:dimnsZ[2]){ X <- matX[,j] Z <- matZ[,j] if (methodGhatm=="npudist"){ if (bandwth == 0){ matGm[,j] <- npudist(tdat=X,edat=Z)$dist } else { matGm[,j] <- npudist(tdat=X,edat=Z,bws=bandwth)$dist } } if (methodGhatm=="ecdf"){ matGm[,j] <- ecdf(X)(Z) } if (methodGhatm=="ecdfmodif"){ matGm_temp <- ecdf(X)(Z) m <- nrow(matX) matGm[,j] <- (m*matGm_temp+b)/(m+1) } } return(matGm) } weibullGMMestim <- function(matGm,truevalues=NULL){ ## This function massively computes the estimates of lambda and k ## from the output of the function GZestimation(), via M-estimation ## (see equation (8) in the paper) ## ## Input : ## - matGm : a matrix issued from GZestimation(), containing the ## values \hat{G}m(Z_ij) used for computing \hat{p}_12 and \hat{p}_13 ## - truevalues : an optional matrix, which can contain appropriate ## starting values for the GMM estimation process. ## For instance, in a simulation setting, this optional matrix could ## contain the true underlying values of the Weibull parameters lambda and k. ## (this matrix should contain the lambda values in the first row, and ## the k values in the second row ; the number of columns must be same as that of matGm) ## ## Output : ## - a list of 2 elements, the first one ($lambdahat) containing the GMM estimate ## of the scale lambda parameter for each column of matGm (ie of matX and matZ), ## and the second one ($khat) containing the estimate of the shape k shape parameter. ## ## Requires : the gmm() routine from the gmm package , and the utilitary function functiong() ## ## Rem : in gmm(), the "nlminb" optimization routine is used (instead of "optim"), ## due to the fact that the target parameters must be positive. matGm <- as.matrix(matGm) Ncol <- dim(matGm)[2] lambdahat.vec <- rep(0,Ncol) khat.vec <- rep(0,Ncol) if ( is.null(truevalues) ){ startvalues <- matrix(rep(1,2*Ncol),nrow=2,ncol=Ncol) # starting values, if not precised, are set to 1 and 1 for lambda and k } else { startvalues <- truevalues } for (j in 1:Ncol){ #cat(j,":") GnhatZ = matGm[,j] EGMMweibull <- gmm(g=functiong , x=GnhatZ , t0=startvalues[,j], optfct = "nlminb", lower=c(10^(-8),10^(-8)),upper=c(Inf,Inf), onlyCoefficients = TRUE ) lambdahat.vec[j] <- EGMMweibull$coefficients[[1]] khat.vec[j] <- EGMMweibull$coefficients[[2]] #cat(" ",round(lambdahat.vec[j],4),",") #cat(round(khat.vec[j],4),"(GMM)\n") } return( list("lambdahat"=lambdahat.vec,"khat"=khat.vec) ) } p1rfarW <- function(lam.vec,k.vec,r.vec,lowerbnd=10^(-5)){ ## This vectorialized function simply computes the parametric estimates of p_1r and far(r) ## via formula (7) in the paper, from a given set of values of lambda, k, and r ## ## This function computes no associated variances, see p1rfarW_var() below for this purpose. ## ## Input : ## - vectors of same sizes containing values of lambda, k, r ## (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) ## - an optional technical parameter, lowerbnd (see laplaceWeibull() for some details) ## ## Output : ## - if the triplets of values (lambda,k,r) contain estimates of lambda and k, ## then the ouptut is the list of associated couples of values ## ( \hat{p}^(W)_1r , \hat{far}^(W)(r) ) ## - if the triplets of values (lambda,k,r) contain true values of lambda and k ## (chosen for instance in a simulation session), then the output is the list ## of associated couples of values ( p_1r , far(r) ) ## ## Requires : laplaceWeibull() m=length(lam.vec) p1r.vec=rep(0,m) for (j in 1:m){ p1r.vec[j] <- laplaceWeibull(r.vec[j]-1,lam.vec[j],k.vec[j],lowerbnd=lowerbnd) } far.vec <- 1 - 1 / (r.vec * p1r.vec) return( list("p1r"=p1r.vec,"far"=far.vec) ) } p1rfarW_var <- function(lam.vec,k.vec,r.vec,a=1,p1rvec=NULL,debugg=FALSE){ ## This (vectorialized) function computes the asymptotic variance (up to sqrt(n)) ## of the parametric GMM estimator \hat{p}^(W)_1r of p_1r , and of the ## corresponding estimator of \hat{far}^(W)(r) of far(r) = 1 - 1/(r*p_1r). ## ## The outputs can be used to construct tests or confidence intervals for p_1r or far(r). ## ## Input : ## - vectors of same sizes containing values of lambda, k, r ## (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) ## - the value a containing the ratio sqrt(n/m) (by default, a=1) ## - optionally, the parameter p1rvec containing values of \hat{p}^(W)_1r ## which have been previously computed (via p1rfarW()), in order to save computing time ; ## by default, p1rvec=NULL means that \hat{p}^(W)_1r will be recomputed ## ## Output : ## - a list containing couples of values, the estimated asymptotic variances of ## sqrt(n)( \hat{p}^(W)_1r - p_1r ) and of sqrt(n)( \hat{far}^(W)(r) - far(r) ). ## ## Requires : matcovNn(), jacobianFunctiong12(), jacobianFunctiongrminus1() ## and possibly also p1rfarW(). lvec <- length(lam.vec) variancesp1r <- rep(0,lvec) variancesfar <- rep(0,lvec) if (debugg) {cat("Step matcovNn\n")} covNn <- matcovNn(lam.vec,k.vec,a=a) if (debugg) {cat("Step jacobg12\n")} jac12 <- jacobianFunctiong12(lam.vec,k.vec) if (debugg) {cat("Step jacobgrmoins1\n")} jacrm1 <- jacobianFunctiongrminus1(lam.vec,k.vec,r.vec) if (is.null(p1rvec)){ p1rvec <- p1rfarW(lam.vec,k.vec,r.vec)$p1r } for (i in 1:lvec){ jac12inv <- solve(jac12[[i]]) variancesp1r[i] <- jacrm1[[i]] %*% jac12inv %*% covNn[[i]] %*% t(jac12inv) %*% t(jacrm1[[i]]) variancesfar[i] <- variancesp1r[i] / (r.vec[i]*(p1rvec[i]^2))^2 } return(list("varp1r"=variancesp1r,"varfar"=variancesfar)) } p1rfar_NPparamestim <- function(lam.vec,k.vec,r.vec,a=1,lowerbnd=10^(-5)){ ## This function computes the parametric estimates of the different ## components of the asymptotic variance of the non-parametric estimator \hat{p}_1r of p_1r. ## With the notations of the paper, it thus computes the parametric estimates ## of tau^2_r and sigma^2_r in proposition 1. ## ## For a NON-parametric estimate of those variances, use function estimparamsNP() instead. ## ## Input : ## - vectors of same sizes containing values of lambda, k, r ## (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) ## - the value a containing the ratio sqrt(n/m) (by default, a=1) ## - an optional technical parameter, lowerbnd (see laplaceWeibull() for some details) ## ## Output : ## - a list containing (in this order) \hat{p}^(W)_1r and the parametric ## estimates of tau^2_r, sigma^2_r, and (just for reference) M_r - (\hat{p}^(W)_1r)^2 ## for every triplet of values (lambda,k,r) provided as inputs ## ## Requires : laplaceWeibull() and Mrminusp1r2func() m=length(lam.vec) p1r = rep(0,m) ; p2rmoins1 = p1r ; Mrminusp1r2 = p1r for (j in 1:m){ p1r[j] <- laplaceWeibull(r.vec[j]-1,lam.vec[j],k.vec[j],lowerbnd=lowerbnd) p2rmoins1[j] <- laplaceWeibull(2*r.vec[j]-2,lam.vec[j],k.vec[j],lowerbnd=lowerbnd) Mrminusp1r2[j] <- Mrminusp1r2func(r.vec[j],lam.vec[j],k.vec[j],lowerbnd=lowerbnd) } tau2r <- p2rmoins1 - p1r^2 sigma2r <- tau2r + a*(r.vec-1)^2 * Mrminusp1r2 # here the factor a intervenes return(list("p1r"=p1r,"tau2r"=tau2r,"sigma2r"=sigma2r,"Mrminusp1r2"=Mrminusp1r2)) } p1rfar_NPestim <- function(matGm,r.vec,a=1){ ## This function computes the non-parametric estimates of the different ## components of the asymptotic variance of the non-parametric estimator \hat{p}_1r of p_1r. ## With the notations of the paper, it thus computes the parametric estimates ## of tau^2_r and sigma^2_r in proposition 1. ## ## For a parametric estimate of those variances, use function p1rfar_NPparamestim() instead. ## ## Input : ## - matGm : a matrix issued from GZestimation(), containing the values \hat{G}m(Z_ij) ## - r.vec : a vectors of size the number of columns of matGm, containing the ## values of r of interest (in practice, r.vec is often a repetition of the same value) ## - the value a containing the ratio sqrt(n/m) (by default, a=1) ## ## Output : ## - a list containing the following non-parametric estimates (in the notations of the paper) ## > of p_1r ("$p1rhat" field, i.e. \hat{p}_1r in the notations of the paper) ## > of p_{1,2r-1} ("$p12rrm1hat" field) ## > of tau^2_r ("$tau2rhat" field) ## > of M_r ("$Mrhat" field) ## > of sigma^2_r ("$sigma2rhat" field, the estimate of the asymptotic variance of \hat{p}_1r, ## see the formula following Proposition 1), ## i.e. a 5-tuple element for every given value of r in the input r.vec ## ## Requires : nothing, but uses function outer() matGm <- as.matrix(matGm) Ncol <- dim(matGm)[2] nn <- dim(matGm)[1] p1r <- rep(0,Ncol) p12rm1 <- p1r ; Mr <- p1r ; tau2r <- p1r ; sigma2r <- p1r mat.r <- matrix(rep(r.vec,nn),nrow=nn,ncol=Ncol,byrow=TRUE) Grmoins2 = matGm^(mat.r - 2) Grmoins1 = Grmoins2 * matGm p1r = apply(Grmoins1,2,mean) p12rm1 = apply(Grmoins1*Grmoins1,2,mean) tau2r = p12rm1 - p1r^2 for (j in 1:Ncol){ GnZ <- matGm[,j] Grm2 <- Grmoins2[,j] productsGrm2 <- outer(Grm2,Grm2) # by default, the product is applied # and productsGrm2 is thus a nn x nn matrix, containing the Gn(Zi)^(r-2)*Gn(Zl)^(r-2) minimaG <- outer(GnZ,GnZ,pmin) # contains the min(Gn(Zi),Gn(Zl)), and is also nn x nn # (rem : pmin is indeed required, not simply min !) matMr <- productsGrm2 * minimaG Mr[j] <- mean(matMr) # this is indeed the mean of the nn^2 values of the matrix matMr } sigma2r = tau2r + a*(r.vec-1)^2 * (Mr - p1r^2) # here comes up the value a, in practice the ratio sqrt(n/m) return( list("p1rhat"=p1r,"p12rm1hat"=p12rm1,"tau2rhat"=tau2r, "Mrhat"=Mr,"sigma2rhat"=sigma2r) ) } simulWclass <- function(m,n,N,ksiX,ksiZ,sigX,supportauto=TRUE,muX=0,muZ=0,sigZ=0,graph=TRUE){ # This function produces simulations of sample matrices matX and matZ # (which will be given as inputs of function GZestimation()) # of GEV distributions which satisfy the W-class assumption # (i.e. guaranteeing the equal support assumption), or not. # # Input : # - mandatory input parameters are m,n,N,ksiX,ksiZ,sigX # where m and n are the counterfactual and factual datasizes, # N is the size of the simulation (number of samples produced), # ksiX and ksiZ are the shape parameters, and sigX the scale counterfactual parameter # - the scale factual parameter sigZ is optional : # if not provided, then it will be set equal to sigX # - if supportauto=TRUE, then the supports of F and G will be forced to be identical # - the other optional parameters refer to the more general setting described # in Proposition 6 of the paper (in the appendix), allowing for instance different support sets. # (in particular, if supportauto=FALSE, specifying the lower/upper endpoints muX and muZ is mandatory) # # Output : # - a plot representing the factual (in red) and counterfactual (in black) densities (except if graph=FALSE) # - a list containing 4 elements # > the counterfactual datasets matrix ($matX), # > the factual datasets matrix ($matZ) # > the value of the scale parameter lambda ($lam) of the associated Weibull distribution # > the value of the shape parameter k ($k) of the associated Weibull distribution if (sigZ == 0) { sigZ = sigX } if (supportauto == TRUE){ muZ <- muX + sigZ/ksiZ - sigX/ksiX } matX <- matrix(rep(0,m*N),nrow=m,ncol=N) matZ <- matrix(rep(0,n*N),nrow=n,ncol=N) for (j in 1:N){ matX[,j] = muX + (sigX/ksiX)*( (-log(runif(m)))^(-ksiX) - 1 ) matZ[,j] = muZ + (sigZ/ksiZ)*( (-log(runif(n)))^(-ksiZ) - 1 ) } if (graph==TRUE){ plot(density(matX[,1]), main="Kernel density estimates \nfor the first X sample (black, counterfactual),\n and the first Z sample (red, factual)", cex.main=0.8, xlim=c(-4,12)) lines(density(matZ[,1]),col="red") } return(list("matX"=matX, "matZ"=matZ, "lam"=((sigX/ksiX)/(sigZ/ksiZ))^(1/ksiX), "k"=ksiX/ksiZ) ) } farrecord <- function(X,Z,r.vec,weibtest=F,alpha=0.05,plot=F){ # This function deals with a single couple of counterfactual and factual # datasets, and provides the statistical estimates, standard errors, # and confidence intervals, for a set of values of r given as input. # # It is intended for a quick implementation of the methods described # in the paper, for a single framework. For a larger scale study, with # several (to hundreds of) datasets, and also several values of r, # it is advised to proceed step by step by using the functions described # in the rest of this section (GZestimation(),weibullGMMestim(),etc...) # # Input : # - X and Z, the single counterfactual and factual datasets (vectors) # - r.vec : a vector containing the desired values of r for the study # - alpha : the parameter tuning the level of the confidence interval (0.05 by default) # - weibtest= : if TRUE, output will include the p-value of the WLK # weibullity test according to the procedure described # in the paper. Default is FALSE, since this part of the # code is the longest to proceed. # - plots= : if T, will provide two plots, of \hat{p}^(W)_1r and \hat{far}^(W)(r) # as a function of r, with confidence bounds appearing. Default is F. # - includenp : default F, if T then will include the nonparametric estimates in the plot as well # # Output : # - a list containing different elements, all of which (except $weibtest.pvalue) # being vectors or matrices of same height as the input r.vec, one row # for each value of r in r.vec : # Base checks if (is.matrix(X)){ if (ncol(X)>1){ stop("X must be a vector or a 1 column matrix") } } if (is.matrix(Z)){ if (ncol(Z)>1){ stop("Z must be a vector or a 1 column matrix") } } X=as.vector(X); Z=as.vector(Z) if (is.numeric(X)==F | is.numeric(Z)==F){ stop("Either X or Z is not fully numeric.") } if ( is.null(r.vec) ){ stop("A non-null value for r.vec must be provided.") } if (is.numeric(r.vec)==F | sum(r.vec <= 0) > 0){ stop("Either r.vec is not numeric or it contains negative values.") } # Base computations vecGm <- GZestimation(X,Z) #cat("vecGm : ",round(vecGm,2),"\n") m=length(X); n=length(Z) lr <- length(r.vec) thetahat <- weibullGMMestim(vecGm) lam.vec <- rep(thetahat$lambdahat,lr) k.vec <- rep(thetahat$khat,lr) a=sqrt(n/m) # Standard Errors and Confidence intervals p1rfarparam <- p1rfarW(lam.vec,k.vec,r.vec) p1rhat <- p1rfarparam$p1r ; farhat <- p1rfarparam$far resparam <- p1rfarW_var(lam.vec,k.vec,r.vec,a,p1rhat) stdp1rW <- sqrt(resparam$varp1r) stdfarW <- sqrt(resparam$varfar) zalpha <- qnorm(1-alpha/2) lowerbndp1r <- p1rhat*exp(-(zalpha*stdp1rW)/sqrt(n*p1rhat^2)) upperbndp1r <- p1rhat*exp(+(zalpha*stdp1rW)/sqrt(n*p1rhat^2)) lowerbndfar <- farhat-zalpha*stdfarW/sqrt(n) upperbndfar <- farhat+zalpha*stdfarW/sqrt(n) # Nonparametric approach matGm <- matrix( vecGm, ncol=lr, nrow=n, byrow=F) resnonparam <- p1rfar_NPestim(matGm,r.vec,a) p1rhatnp <- resnonparam$p1rhat farhatnp <- 1 - 1/(r.vec*p1rhatnp) stdp1rnp <- sqrt(resnonparam$sigma2rhat) stdfarnp <- stdp1rnp/(r.vec*p1rhatnp^2) lowerbndp1rnp <- p1rhatnp*exp(-(zalpha*stdp1rnp)/sqrt(n*p1rhatnp^2)) upperbndp1rnp <- p1rhatnp*exp(+(zalpha*stdp1rnp)/sqrt(n*p1rhatnp^2)) lowerbndfarnp <- farhatnp-zalpha*stdfarnp/sqrt(n) upperbndfarnp <- farhatnp+zalpha*stdfarnp/sqrt(n) # Pseudo weibulls and Weibullity test if (weibtest){ pseudoweibull <- -log(vecGm) suppressWarnings( WLKstat <- WLK.test(pseudoweibull,nsim=2)$statistic ) # the option nsim is only useful for computing a p-value # but since we compute it ourselves below, we do not keep # the default value of nsim=200, which uselessly lengthen # the execution time. nsimMC=500 WLKsimvalues <- rep(0,nsimMC) for (i in 1:nsimMC){ unifsamp <- runif(m) weibsamp <- rweibull(n,shape=thetahat$khat,scale=thetahat$lambdahat) weibhatsamp <- -log( GZestimation(unifsamp,exp(-weibsamp),method="ecdfmodif",b=0.05) ) suppressWarnings( WLKsimvalues[i] <- WLK.test(weibhatsamp,nsim=2)$statistic ) #cat(round(WLKsimvalues[i],2),", ") } #cat("WLKstat :",WLKstat,"\n") weibtestpvalue <- mean( WLKsimvalues >= WLKstat ) #cat("weibtestpvalue :",weibtestpvalue,"\n") } # Optional plots if (plot){ plot(r.vec,p1rhat,type="l",lty=1,lwd=2,col="red", main="p1r parametric estimates (in red)\n with confidence bounds (in black)\n(in blue, the counterfactual p0r=1/r)", cex.main=0.8, ylim=c(0,1)) points(r.vec,p1rhat,pch=20,lwd=2,col="red") points(r.vec,1/(r.vec),pch=3,lwd=2,col="blue") points(r.vec,upperbndp1r,pch=18,lwd=1,col="black") points(r.vec,lowerbndp1r,pch=18,lwd=1,col="black") lines(r.vec,upperbndp1r,pch=18,lty=2,lwd=1,col="black") lines(r.vec,lowerbndp1r,pch=18,lwd=1,lty=2,col="black") #lines(r.vec,upperbndp1rnp,pch=18,lty=2,lwd=1,col="green") #lines(r.vec,lowerbndp1rnp,pch=18,lwd=1,lty=2,col="green") plot(r.vec,farhat,type="l",lty=1,lwd=2,col="red", main="far(r) parametric estimates (in red)\n with confidence bounds (in black)", cex.main=0.8, ylim=c(0,1)) points(r.vec,farhat,pch=20,lwd=2,col="red") points(r.vec,upperbndfar,pch=18,lwd=1,col="black") points(r.vec,lowerbndfar,pch=18,lwd=1,col="black") lines(r.vec,upperbndfar,pch=18,lty=2,lwd=1,col="black") lines(r.vec,lowerbndfar,pch=18,lwd=1,lty=2,col="black") #lines(r.vec,upperbndfarnp,pch=18,lty=2,lwd=1,col="green") #lines(r.vec,lowerbndfarnp,pch=18,lwd=1,lty=2,col="green") } if (weibtest){ return(list("p1rhat"=p1rhat, "p1rhatnp"=p1rhatnp, "farhat"=farhat, "farhatnp"=farhatnp, "r"=r.vec, "weibtestpvalue"=weibtestpvalue, "stdp1rW"=stdp1rW, "stdp1rnp"=stdp1rnp, "confintp1rW"=cbind(lowerbndp1r,upperbndp1r), "confintp1rnp"=cbind(lowerbndp1rnp,upperbndp1rnp), "confintfarW"=cbind(lowerbndfar,upperbndfar), "confintfarnp"=cbind(lowerbndfarnp,upperbndfarnp) ) ) } else { return(list("p1rhat"=p1rhat, "p1rhatnp"=p1rhatnp, "farhat"=farhat, "farhatnp"=farhatnp, "r"=r.vec, "stdp1rW"=stdp1rW, "stdp1rnp"=stdp1rnp, "confintp1rW"=cbind(lowerbndp1r,upperbndp1r), "confintp1rnp"=cbind(lowerbndp1rnp,upperbndp1rnp), "confintfarW"=cbind(lowerbndfar,upperbndfar), "confintfarnp"=cbind(lowerbndfarnp,upperbndfarnp) ) ) } } ###################################### ## B. Technical Utilitary functions ## ###################################### ## Of all the technical functions below, the user can be essentially interested ## in laplaceWeibull() (computing the basic element E(G(Z)^j)), ## and possibly matcovNn(), jacobianFunctiong12(), and jacobianFunctiongrminus1() ## for handling variance estimates of (\hat{lambda},\hat{k}) for instance. funcLaplace <- function(x,m,lam,k,a){ # Utilitary function for use in the integrate() statement # in function laplaceWeibull() defined below (1/a) * exp( -(m*lam/a^(1/k))*(-log(x))^(1/k) ) * x^(1/a - 1) } laplaceWeibull <- function(j,lambda,k,lowerbnd=10^(-6),upperbnd=1,fac=1,tol=10^(-5)){ # By default (i.e. when upperbnd=1), this basic but crucial function computes E(exp(-j*W)) # for any given integer j, where W is a Weibull(lambda,k) variable. # # The computation is based on integration rather than partial power series, # which has proved particularly unreliable in our context. # # Input : # - single values of j, lambda and k (this function is NOT vectorialized) # (in practice, lambda and k are estimates of lambda and k issued from weibullGMMestim()) # - other optional parameters controlling the way the numerical integration is conducted # (the integral in question is the one in formula (7) in the paper) # > lowerbnd indicates the lower bound from which we will ask R to integrate # (the value 0 is not allowed, but it must be sufficiently small for the # evaluation to be valuable) ; by default, lowerbnd=10^(-6). # > upperbnd is 1 by default : this upper bound will be set to some other # value when laplaceWeibull() will be called upon in function Mrminusp1r2() for instance. # > the parameter tol indicates the desired precision for the numerical approximation of the integral. # > the parameter fac is a tuning parameter used for improving accuracy (see paper) # # Output : # - the numerical evaluation of E(exp(-j*W)) when W~Weibull(lambda,k) # # Used in function : many functions in this file # Requires : funcLaplace() # # Example illustrating that this function works : ## > integrate(funcLaplace,lower=10^(-8),upper=1,m=10,lam=0.3,k=1.3,a=1) ## 0.2049931 with absolute error < 9.9e-05 ## > laplaceWeibull(10,lam=0.3,k=1.3) ## [1] 0.2049932 # vala=fac*(j*lambda)^k upperbndmodif=upperbnd^vala lowerbndmodif=upperbndmodif*10^(-5) I <- integrate(f=funcLaplace, lower=lowerbndmodif, upper=upperbndmodif, subdivisions=1000L, rel.tol=tol, m=j,lam=lambda,k=k,a=vala, stop.on.error = FALSE) resultat <- I$value return(resultat) } functiong <- function(theta,vecx){ # A utilitary function required for the M-estimation # of (lambda,k) in function weibullGMMestim() # # Input : # - theta : a vector of size 2 containing the tentative values of lambda and k # - vecx : a vector of size n supposed to contain the values \hat{G}_m(Z_i) # Output : # - a nx2 matrix which first and second columns respectively contain the values # \hat{G}_m(Z_i) - p_12(lambda,k) and (\hat{G}_m(Z_i))^2 - p_13(lambda,k) # # Used in : weibullGMMestim() # Requires : laplaceWeibull() lambdaval=theta[1] ; kval = theta[2] p12val <- laplaceWeibull(j=1,lambda=lambdaval,k=kval,lowerbnd=10^(-8)) p13val <- laplaceWeibull(j=2,lambda=lambdaval,k=kval,lowerbnd=10^(-8)) M <- cbind( vecx - p12val , vecx^2 - p13val ) return(M) } funcLaplacebis <- function(v,m,lam,k,a,lowerbnd=10^(-5),tol=10^(-5)){ # Utilitary function, similar to funcLaplace() # # Used in : Mrminusp1r2func() # Requires : laplaceWeibull() # # (Caution : the vector v must contain no 0 value) nv=length(v) mprime = m*lam / a^(1/k) lamprime = lam / a^(1/k) facteur1 = exp( -mprime*(-log(v))^(1/k) ) facteur2 = exp(lamprime*(-log(v))^(1/k)) - 1 facteur3=rep(0,nv) for (i in 1:nv){ facteur3[i] = laplaceWeibull( j=m, lambda=lam, k=k, lowerbnd=(10^-6)*v[i], upperbnd= v[i]^(1/a), tol=tol ) # borninf garanteed to be both low and lower than v[i] } # the output is a vector facteur4=v^(1/a - 1) return( (2/a)*facteur1*facteur2*facteur3*facteur4 ) } Mrminusp1r2func <- function(r,lambda,k,lowerbnd=10^(-5),fac=0.5,tol=10^(-5)){ # This function computes M_r - p_{1,r}^2 for a series of values # of r, lambda, and k given as input (vectors), which amounts to compute # M_r - (p_1r)^2 # = E(e^(-(r-1)(W1+W2)+min(W1,W2)) - (p_1r)^2 # = 2*E( exp(-(r-1)(W1+W2)) * (exp(W2)-1) indicator(W1>W2) ) # i.e. computation of some "separated variables" integral # Details provided in the doc or the proofs. # # As in function laplaceWeibull(), numerical issues must be tackled # (in the computation of facteur3 in funcLaplacebis for instance) # # Used in : p1rfar_NPparamestim() # Requires : funcLaplace(), LaplaceWeibull(), funcLaplacebis() # # Rem: when k=1, we know the exact value of Mr-p1r^2 : it is # lambda / ( (1+(r-1)*lambda)^2 * (2+(2*r-3)*lambda) ) # vala=fac*((r-1)*lambda)^k I <- integrate(f=funcLaplacebis, lower=lowerbnd,upper=1, subdivisions=1000L, rel.tol=10^(-10), m=r-1, lam=lambda, k=k, a=vala, lowerbnd=lowerbnd, stop.on.error = FALSE) return(I$value) } funcLaplaceter <- function(v,m,lam,k,a,lowerbnd=10^(-5),tol=10^(-5)){ # Utilitary function used inside function Mrfunc() defined below. nv=length(v) mprime = (m-1)*lam / a^(1/k) facteur1 = exp( -mprime*(-log(v))^(1/k) ) facteur2=rep(0,nv) for (i in 1:nv){ facteur2[i] = laplaceWeibull( j=m, lambda=lam, k=k, lowerbnd=(10^-6)*v[i], upperbnd= v[i]^(1/a), tol=tol ) # borninf is guaranteed to be both close to 0, AND < v[i] } facteur3=v^(1/a - 1) return((2/a)*facteur1*facteur2*facteur3) } Mrfunc <- function(r,lambda,k,lowerbnd=10^(-5),fac=0.5,tol=10^(-5)){ # This function computes the value # M_r = E( G(Z1)^(r-2).G(Z2)^(r-2).min(G(Z1),G(Z2)) ) # which is part of the asymptotic variance of the nonparametric # estimator of p_1r, but also participates to the asymptotic # variance of the parametric estimator \hat{p}^(W)_1r of p_1r. # # Inputs are single values of r, lambda, k : it is not vectorialized. # # Used in : matcovNn() (itself required by p1rfarW_var()) # Requires : funcLaplaceter() vala=fac*((r-1)*lambda)^k I <- integrate(f=funcLaplaceter, lower=lowerbnd,upper=1, subdivisions=1000L, rel.tol=10^(-5), m=r-1, lam=lambda, k=k, a=vala, lowerbnd=lowerbnd, stop.on.error = FALSE) return(I$value) } foncdgjoverdlambda <- function(u,j,lam,k,a){ # Utilitary function used inside dgjoverdlambdafunc() (-j/a^((1/k)+1)) * (-log(u))^(1/k) * exp( -(j*lam/a^(1/k))*(-log(u))^(1/k) ) * u^(1/a - 1) } dgjoverdlambdafunc <- function(j,lambda,k,lowerbnd=10^(-6),fac=0.5){ # This utilitary function computes the partial derivative w.r.t. lambda # of the expectation E( exp(-j*W) ) where W is Weibull(lambda,k). # This function is needed for computing a variance in a delta-method. # # Used in : jacobianFunctiong12(), jacobianFunctiongrminus1() # Requires : foncdgjoverdlambda() vala=fac*(j*lambda)^k I <- integrate(f=foncdgjoverdlambda,lower=lowerbnd,upper=1,subdivisions=1000L, j=j,lam=lambda,k=k,a=vala, stop.on.error = FALSE) return(I$value) } foncdgjoverdk <- function(u,j,lam,k,a){ # Utilitary function used inside dgjoverdkfunc() (-lam/k^2) * log( (1/a)*(-log(u)) ) * foncdgjoverdlambda(u,j,lam,k,a) } dgjoverdkfunc <- function(j,lambda,k,lowerbnd=10^(-6),fac=1){ # This utilitary function computes the partial derivative w.r.t. k # of the expectation E( exp(-j*W) ) where W is Weibull(lambda,k). # This function is needed for computing a variance in a delta-method. # # Used in : jacobianFunctiong12(), jacobianFunctiongrminus1() # Requires : foncdgjoverdk() vala=fac*(j*lambda)^k I <- integrate(f=foncdgjoverdk,lower=lowerbnd,upper=1,subdivisions=1000L, j=j,lam=lambda,k=k,a=vala,stop.on.error = FALSE) return(I$value) } foncpartiedeEG1minG1G2 <- function(x,lam,k,a){ # Utilitary function used inside function partiedeRG1minG1G2func() (1/a) * x^( 2/a - 1) * exp( -(2*lam/a^(1/k)) * (-log(x))^(1/k) ) } partiedeEG1minG1G2func <- function(lambda,k,lowerbnd=10^(-6),fac=0.5){ # This function computes a part of E(G(Z1)*min(G(Z1),G(Z2))), # an expression appearing inside the limit covariance matrix of (\hat{p}_12,\hat{p}_13). # It is used inside function matcovNn(). vala=fac*(2*lambda)^k I <- integrate(f=foncpartiedeEG1minG1G2,lower=lowerbnd,upper=1,subdivisions=1000L, lam=lambda,k=k,a=vala, stop.on.error = FALSE) return(I$value) } matcovNn <- function(lam.vec,k.vec,a=1){ ## This vectorialized routine computes the matrix Sigma_a in the paper, ## asymptotic covariance matrix of ## ## N_n = sqrt(n)( (\hat{p}_12,\hat{p}_13) - (p_12,p_13) ) ## ## where the value a is the limit of sqrt(n/m) as n,m\to\infty ## ## Input : ## - vectors of same sizes containing values of lambda and k ## (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) ## - the value a containing the ratio sqrt(n/m) (by default, a=1) ## ## Output : ## - a list containing the 2x2 asymptotic covariance matrices described above ## ## Used in : p1rfarW_var() ## Requires : laplaceWeibull(), Mrfunc(), partiedeEG1minG1G2() lvec <- length(lam.vec) listematcov <- list() for (i in 1:lvec){ lambda <- lam.vec[i] ; k <- k.vec[i] p12 <- laplaceWeibull(1,lambda,k) p13 <- laplaceWeibull(2,lambda,k) p14 <- laplaceWeibull(3,lambda,k) p15 <- laplaceWeibull(4,lambda,k) M2 <- Mrfunc(2,lambda,k) M3 <- Mrfunc(3,lambda,k) EG1minG1G2 <- (1/2)*p12^2 + p13 - partiedeEG1minG1G2func(lambda,k) varphat1 <- a*M2 + p13 - (1+a)*p12^2 varphat2 <- 4*a*M3 + p15 - (1+4*a)*p13^2 covphat1et2 <- 2*a*EG1minG1G2 + p14 - (1+2*a)*p12*p13 matcov <- matrix(c(varphat1,covphat1et2,covphat1et2,varphat2),2,2) listematcov[[i]] <- matcov } return(listematcov) } jacobianFunctiong12 <- function(lam.vec,k.vec,debugg=FALSE){ # This vectorialized function computes the (2x2) jacobian matrices of function # g : (lambda,k) -> # ( g1(lambda,k), g2(lambda,k) ) = ( E(G(Z)) , E(G^2(Z)) ) # at the values (lambda,k) given as inputs. # # Such a jacobian is needed to compute the asymptotic covariance matrix # of the M-estimators (\hat{lambda},\hat{k}) , via a delta-method starting # from the estimators (\hat{p}_12,\hat{p}_13). It will then be useful for evaluating # the asymptotic variance of the parametric estimator \hat{p}^(W)_1r of p_1r. # # Input : # - vectors of same sizes containing values of lambda and k # (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) # # Output : # - a list containing the 2x2 jacobian matrices described above # # Used : p1rfarW_var() # Requires : dgjoverdlambdafunc(), dgjoverdkfunc() lvec <- length(lam.vec) listejacobiennes <- list() for (i in 1:lvec){ lambda <- lam.vec[i] ; k <- k.vec[i] dg1surdlambda <- dgjoverdlambdafunc(1,lambda,k) if (debugg){ cat("i=",i,": dg1dlam,") } dg1surdk <- dgjoverdkfunc(1,lambda,k) if (debugg){ cat("dg1dk,")} dg2surdlambda <- dgjoverdlambdafunc(2,lambda,k) if (debugg){ cat("dg2dlam,")} dg2surdk <- dgjoverdkfunc(2,lambda,k) if (debugg){ cat("dg2dk\n")} listejacobiennes[[i]] <- matrix(c(dg1surdlambda,dg1surdk,dg2surdlambda,dg2surdk), 2,2,byrow=TRUE) } return(listejacobiennes) } jacobianFunctiongrminus1 <- function(lam.vec,k.vec,r.vec){ # This vectorialized function computes the jacobian matrices (of dimension 1x2) # of the function g_{r-1} : (lambda,k) -> g_{r-1}(lambda,k) = E( G^(r-1)(Z) ) # at the values (lambda,k) given as inputs. # # It allows the passage from the asymptotic normality of (\hat{lambda},\hat{k}) # to the one of the parametric estimator \hat{p}^(W)_1r. # # Input : # - vectors of same sizes containing values of lambda, k, r # (in practice, these values are estimates of lambda and k issued from weibullGMMestim()) # # Output : # - a list containing the jacobian 1x2 matrices described above # # Used in : p1rfarW_var() # Requires : dgjoverdlambdafunc(), dgjoverdkfunc() lvec <- length(lam.vec) listejacobiennes <- list() for (i in 1:lvec){ lambda <- lam.vec[i] ; k <- k.vec[i] ; r <- r.vec[i] dgoverdlambda <- dgjoverdlambdafunc(r-1,lambda,k) dgoverdk <- dgjoverdkfunc(r-1,lambda,k) listejacobiennes[[i]] <- matrix(c(dgoverdlambda,dgoverdk),nrow=1,ncol=2) } return(listejacobiennes) }