# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

print("Exercise 1: i")
# Draw from Cauchy build-in:
n=100000
c1 = np.random.standard_cauchy(size=n)
c2 = np.random.randn(n)/np.random.randn(n)

bins = np.linspace(-10, 10, 100)
plt.figure("Cauchy distribution")
plt.subplot(121)
#depending on your python/matplot version you have to use 'normed' instad of 'density'
plt.hist(c1,bins, density = True)
plt.title("build-in Cauchy")
plt.ylabel('Probability density P(x)')
plt.xlabel('x')
plt.subplot(122)
plt.hist(c2, bins, density = True)
plt.title("normal/normal")
plt.xlabel('x')
    
plt.show()

input("Press Enter to continue...")

print("Exercise 1: ii")
n = 1000
y = np.arange(0,n)/n

# Draw from Gaussian:
x = np.random.randn(n)
x.sort()


# Now get N sums of M numbers drawn from cauchy
def draw_cauchy(N,M):
    xjs = [sum(np.random.standard_cauchy(size=M)) for i in range(0,N)]
    xjs = (xjs - np.mean(xjs))/np.std(xjs)
    xjs.sort()
    return(xjs)


# Let's try for M=2
#xc = draw_cauchy(n,2)
#plt.figure("Cumulative Cauchy Single")
#plt.plot(xc,y,'ro')
#plt.show()
#plt.title('Cumulative density functions')
#plt.ylabel('Percentage of area under curve up to x')
#plt.xlabel('x')

plt.figure("Cumulative Cauchy")
# Now for different values of M:
for m in [1,5,10,20,50]:
    xi = draw_cauchy(n,m)
    plt.subplot(121)
    plt.plot(xi, y, label="m=%d"%(m,))
    plt.subplot(122)
    plt.plot(xi, x, label="m=%d"%(m,))
    plt.title('qq-plot')
    plt.ylabel('Gauss')
    plt.xlabel('Cauchy')
    

# add Gauss results from first part for comparison    

plt.subplot(121)
plt.plot(x, y, label="gauss")
plt.title('Cumulative density functions')
plt.ylabel('Percentage of area under curve up to x')
plt.xlabel('x')
leg = plt.legend(loc='best',ncol = 1, shadow=False, fancybox=True)
leg.get_frame().set_alpha(0.5)
plt.show()

input("Press Enter to continue...")

print("Exercise 1: iii-iv")

def get_gauss_mean(N,M):
    xjs = [np.mean(np.random.randn(N)) for i in range(0,M)]
    var = np.var(xjs)
    return(var)

def get_cauchy_mean(N,M):
    xjs = [np.mean(np.random.standard_cauchy(size=N)) for i in range(0,M)]
    var = np.var(xjs)
    return(var)

nvals = range(1,100)
varg = [get_gauss_mean(n,100) for n in nvals]
varc = [get_cauchy_mean(n,100) for n in nvals]
    
plt.figure("Variance")
plt.subplot(121)
plt.semilogy(nvals,varg,'bo')
plt.ylabel('variance of empirical mean')
plt.xlabel('N')
plt.title("Gauss")
plt.subplot(122)
plt.semilogy(nvals,varc,'ro')
plt.title("Cauchy")
plt.xlabel('N')
plt.show()

input("Press Enter to continue...")
##############################################################################

print("Exercise 2: i")

n_realizations = np.array([2,5,10,20,50,200,1000,10000])
n_numberDist = 1000
N_bars = np.size(n_realizations)
used_variance = 2
used_mean = 5

tmp_matrix = np.zeros([2,N_bars])

for irel in range(0,N_bars):
    tmp_draw = np.array(np.random.randn(n_realizations[irel],n_numberDist)*used_variance+used_mean)
    tmp_matrix[0,irel] = np.mean(np.sum((tmp_draw-np.mean(tmp_draw,axis=0))**2,axis=0)/n_realizations[irel])
    tmp_matrix[1,irel] = np.mean(np.sum((tmp_draw-np.mean(tmp_draw,axis=0))**2,axis=0)/(n_realizations[irel]-1) ) 
     

width = 0.35
ind = np.arange(N_bars)
plt.figure()
#depending on your python/matplot version you have to delete the 'tick_label' option
p1 = plt.bar(ind,tmp_matrix[0,:]-used_variance**2,width, color='b', label = "1/N", tick_label = n_realizations)
p2 = plt.bar(ind+width,tmp_matrix[1,:]-used_variance**2,width, color='g', label = "1/(N-1)", tick_label = n_realizations)
plt.title("Bessel correction")
plt.ylabel("var_est - var_true")
plt.xlabel("Index (M)")
plt.show()

input("Press Enter to continue...")
print("Exercise 2: ii")

num_rep = np.array([3,5,10,20,50,200])

for irel in range(0,np.size(num_rep)):
    plt.figure(irel+2)
    tmp_draw = np.array(np.random.randn(num_rep[irel],n_numberDist)+used_mean)
    emp_var = np.sum((tmp_draw - used_mean)**2,axis=0)
    x_distr = np.linspace(num_rep[irel]/10,num_rep[irel]*4,100)
    y_chi2 = stats.chi2.pdf(x_distr,num_rep[irel])
    y_norm = stats.norm.pdf(x_distr,num_rep[irel],np.sqrt(2*num_rep[irel]))
    
    plt.hist(emp_var,50,density=True, facecolor='b', alpha=0.6, label = "empirical variance")
    plt.plot(x_distr,y_chi2,'g',lw=2,label = "chi2")
    plt.plot(x_distr,y_norm,'r',lw=2,label = "gauss")
    plt.show()