---
title: "Examples for ddst package"
author: "Przemyslaw Biecek"
date: "July 26, 2019"
output: 
  html_document:
    toc: true
    toc_depth: 2
vignette: >
  %\VignetteIndexEntry{Examples for ddst package}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Data Driven Smooth Tests with ddst package

This document presents tests avaliable in the `ddst` package.

```{r, warning=FALSE, message=FALSE}
library("ddst")
library("polynom")
```

# II. Data Driven Smooth Tests for Selected Goodness-of-Fit Problems

```{r}
```

## 1. Data Driven Smooth Test for Uniformity

```{r}
set.seed(7)
# H0 is true
z <- runif(80)
t <- ddst.uniform.test(z, compute.p = TRUE, 
                       compute.cv = TRUE, 
                       d.n = 10)
t
plot(t)


# H0 is false
z <- rbeta(80,4,2)
t <- ddst.uniform.test(z, compute.p = TRUE,compute.cv = TRUE,  d.n = 10)
t
t$p.value
plot(t)
```

## 2. Data Driven Smooth Test for Exponentiality

```{r}
set.seed(7)
# H0 is true
z <- rexp(80,4)
t <- ddst.exp.test(z, compute.p = TRUE, d.n = 10)
t
plot(t)


# H0 is false
z = rchisq(80,4)
t = ddst.exp.test (z, compute.p = TRUE, d.n = 10)
t
t$p.value
plot(t)
```

## 3. Data Driven Smooth Tests for Normality


### 3.1. Bounded Basis Functions

```{r}
set.seed(7)
# H0 is true
z <- rnorm(100)
# let's look on first 10 coordinates
d.n <- 10
t <- ddst.normbounded.test(z, compute.p = TRUE, d.n = d.n)
t
plot(t)

# H0 is false
z <- rexp(100, 1)
t <- ddst.normbounded.test(z, compute.p = TRUE, d.n = d.n)
t
plot(t)

# for Tephra data
z <- c(-1.748789, -1.75753, -1.740102, -1.740102, -1.731467, -1.765523,
     -1.761521, -1.72522, -1.80371, -1.745624, -1.872957, -1.729121,
     -1.81529, -1.888637, -1.887761, -1.881645, -1.91518, -1.849769,
     -1.755141, -1.665687, -1.764721, -1.736171, -1.736956, -1.737742,
     -1.687537, -1.804534, -1.790593, -1.808661, -1.784081, -1.729903,
     -1.711263, -1.748789, -1.772755, -1.72756, -1.71358, -1.821116,
     -1.839588, -1.839588, -1.830321, -1.807835, -1.747206, -1.788147,
     -1.759923, -1.786519, -1.726779, -1.738528, -1.754345, -1.781646,
     -1.641949, -1.755936, -1.775175, -1.736956, -1.705103, -1.743255,
     -1.82613, -1.826967, -1.780025, -1.684504, -1.751168)
t <- ddst.normbounded.test(z, compute.p = TRUE, Dmax = d.n)
t
plot(t)
```

### 3.2. Unbounded Basis Functions


```{r}
set.seed(7)
# H0 is true
z <- rnorm(100)
# let's look on first 10 coordinates
d.n <- 10

# calculate finite sample corrections
# see 6.2. Composite null hypothesis H in the appendix materials
e.v <- ddst.normunbounded.bias(n = length(z), d.n = d.n)
e.v


# simulated 1-alpha qunatiles, s(n, alpha) 
# see Table 1 in the JSCS article
s.n.alpha <- 4.4
r.alpha <- 2.708

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t
plot(t)

# H0 is false, same lenght n = 100
z <- rexp(100, 1)

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t
plot(t)


# for Tephra data
z <- c(-1.748789, -1.75753, -1.740102, -1.740102, -1.731467, -1.765523,
       -1.761521, -1.72522, -1.80371, -1.745624, -1.872957, -1.729121,
       -1.81529, -1.888637, -1.887761, -1.881645, -1.91518, -1.849769,
       -1.755141, -1.665687, -1.764721, -1.736171, -1.736956, -1.737742,
       -1.687537, -1.804534, -1.790593, -1.808661, -1.784081, -1.729903,
       -1.711263, -1.748789, -1.772755, -1.72756, -1.71358, -1.821116,
       -1.839588, -1.839588, -1.830321, -1.807835, -1.747206, -1.788147,
       -1.759923, -1.786519, -1.726779, -1.738528, -1.754345, -1.781646,
       -1.641949, -1.755936, -1.775175, -1.736956, -1.705103, -1.743255,
       -1.82613, -1.826967, -1.780025, -1.684504, -1.751168)

# calculate finite sample corrections
e.v <- ddst.normunbounded.bias(n = length(z))
e.v

# simulated 1-alpha qunatiles, s(n, alpha) and  s.o(n, alpha)
# see Table 1 in the JSCS article
s.n.alpha <- 3.3
r.alpha <- 2.142

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t

plot(t)
```


## 4. Data Driven Smooth Test for Extreme Value Distribution


```{r}
library(evd)
set.seed(7)


# H0 is true
x <- -qgumbel(runif(100), -1, 1)
t <- ddst.evd.test (x, compute.p = TRUE, d.n = 10)
t
plot(t)


# H0 is false
x <- rexp(80,4)
t <- ddst.evd.test (x, compute.p = TRUE, d.n = 10)
t
plot(t)
```

# III. Nonparametric Data Driven Smooth Tests for Comparing Distributions


## 5. Data Driven Smooth Test for k-Sample Problem

```{r}
set.seed(7)
# H0 is false
x <- runif(80)
y <- rexp(80, 1)
t <- ddst.twosample.test(x, y, compute.p = TRUE)
t
plot(t)

# H0 is false
x <- runif(80)
y <- rexp(80, 1)
z <- runif(80)
t <- ddst.ksample.test(list(x, y, z))
t
plot(t)

# H0 is true
x <- rnorm(80)
y <- rnorm(80)
z <- rnorm(80)
t <- ddst.ksample.test(list(x, y, z))
t
plot(t)
```

## 6. Data Driven Test for Stochastic Ordering

```{r}
set.seed(7)
library("rmutil", warn.conflicts = FALSE)
# H0 is false
# 1. Pareto(1)/Pareto(1.5)
x <- rpareto(50, 2, 2)
y <- rpareto(50, 1.5, 1.5)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t
plot(t)

# H0 is false
# 2. Laplace(0,1)/Laplace(1,25)
x <- rlaplace(50, 0, 1)
y <- rlaplace(50, 1, 25)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t
plot(t)

# H0 is true
# 3. LN(0.85,0.6)/LN(1.2,0.2)
x <- rlnorm(50, 0.85, 0.6)
y <- rlnorm(50, 1.2, 0.2)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t
plot(t)
```

## 7. Two-Sample Test Against Stochastic Dominance

```{r, warning=FALSE, message=FALSE}
set.seed(7)
# H0 is true
x <- runif(80)
y <- runif(80)
t <- ddst.againststochdom.test(x, y, alpha = 0.05, t = 2.2, k.N = 4)
t
plot(t)

# H0 is false
x <- runif(80)
y <- rbeta(80, 4, 2)
t <- ddst.againststochdom.test(x, y, alpha = 0.05, t = 2.2, k.N = 4)
t
plot(t)
```

## 8. Data Driven Smooth Test for Upward Trend Alternative in k samples

```{r}
set.seed(7)
# H0 is true
x = runif(80)
y = runif(80) + 0.2
z = runif(80) + 0.4
t <- ddst.upwardtrend.test(list(x, y, z), t.p = 2.2, t.n = 2.2)
t
plot(t)


# H0 is false
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
t <- ddst.upwardtrend.test(list(x1, x2, x3, x4), t.p = 2.2, t.n = 2.2)
t
plot(t)
```

## 9. Data Driven Smooth Test for Umbrella Alternatives in k samples

```{r}
set.seed(7)
# H0 is true
x = runif(80)
y = runif(80) + 0.2
z = runif(80)
t <- ddst.umbrellaknownp.test(list(x, y, z), p = 2, t.p = 2.2, t.n = 2.2)
t
plot(t)


# H0 is true
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
x5 = rnorm(80) + 2
x6 = rnorm(80) + 1
x7 = rnorm(80)
t <- ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = 3, t.p = 2.2, t.n = 2.2)
t
plot(t)


t <- ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = 5, t.p = 2.2, t.n = 2.2)
t
plot(t)
```

### Unknown peak

```{r}
# true umbrella pattern
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
x5 = rnorm(80) + 2
x6 = rnorm(80) + 1
x7 = rnorm(80)

##  peak is unknown, so we test for each possible position
statistics <- sapply(1:7, function(i)
ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = i, t.p = 2.2, t.n = 2.2)$statistic)

statistics
max(statistics)
```