#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May  3 16:59:36 2018

@author: mirjam
"""

import numpy as np
import matplotlib.pyplot as plt

N = 10

#which_error = "normal"
#which_error = "uni"
which_error = "lognormal"
sigma = 0.5

x = np.linspace(0,20,N)
if(which_error == "normal"):
    epsilon = np.random.normal(size = N ) 
    y = x+epsilon

if(which_error == "uni"):    
    epsilon = np.random.uniform(low = -3**0.5, high = 3**0.5,size = N ) 
    y = x+epsilon

if(which_error == "lognormal"):
    epsilon = np.random.normal(size = N ) 
    y = np.exp(np.log(x)+sigma*epsilon)


def MLE_LR(x,y):
    nom = 0
    denom = 0
    for i in range(0, np.size(x)):
        denom += x[i]*x[i]
        nom += x[i]*y[i]
    return(nom/denom)
        
ahat = MLE_LR(x,y)
print(ahat)
ycalc = ahat*x

plt.figure("data_sim")
plt.plot(x,y,'o')
plt.plot(x,ycalc,'r', label = 'a=%f'%ahat)
plt.title('simulated data '+which_error)
plt.ylabel('y')
plt.xlabel('x')
plt.legend(loc='best')
plt.show()

def run_MLE(N,M):
    MLE_vals = np.empty(M)
    for i in range(0,M):
        x = np.linspace(0,20,N)
        if(which_error == "normal"):
            epsilon = np.random.normal(size = N) 
            y = x+epsilon
        if(which_error == "uni"):
            epsilon = np.random.uniform(low = -3**0.5, high = 3**0.5,size = N ) 
            y = x+epsilon
        if(which_error == "lognormal"):
            epsilon = np.random.normal(size = N ) 
            y = np.exp(np.log(x)+sigma*epsilon)
        MLE_vals[i] = MLE_LR(x,y)
    return(MLE_vals)

Nvals = np.array([2,5,10,50,100,500,1000])
a_mean = np.empty(np.size(Nvals))
a_var = np.empty(np.size(Nvals))

for i in range(0,np.size(Nvals)):
    a_run = run_MLE(Nvals[i],1000)
    a_mean[i] = np.mean(a_run)
    a_var[i] = np.var(a_run)

plt.figure("a_sim")
plt.subplot(121)
plt.semilogx(Nvals,a_mean,'o')
plt.title('mean a')
plt.ylabel('var a')
plt.xlabel('N')
plt.subplot(122)
plt.loglog(Nvals,a_var,'o')
plt.title('variance a')
plt.ylabel('var a')
plt.xlabel('N')
plt.show()

n = 1000

dist_2 = run_MLE(2,n)
dist_10 = run_MLE(10,n)
dist_100 = run_MLE(100,n)

plt.figure('Plot_distr')
plt.hist(dist_2,40,normed=True,range=[0.9,1.1],alpha=0.5, label="N=2")
plt.hist(dist_10,40,normed=True,range=[0.9,1.1],alpha=0.5, label="N=10")
plt.hist(dist_100,40,normed=True,range=[0.9,1.1],alpha=0.5, label="N=100")
plt.ylabel('Probability density P(x)')
plt.legend(loc='best')
plt.xlabel('x')
plt.show()

x = np.random.randn(n)
x.sort()
y = np.arange(0,n)/n
dist_2_cum = (dist_2 - np.mean(dist_2))/np.std(dist_2)
dist_2_cum.sort()
dist_10_cum = (dist_10 - np.mean(dist_10))/np.std(dist_10)
dist_10_cum.sort()
dist_100_cum = (dist_100 - np.mean(dist_100))/np.std(dist_100)
dist_100_cum.sort()

plt.figure("culmulative")
plt.plot(x,y,label = "gauss")
plt.plot(dist_2_cum,y, label="N=2")
plt.plot(dist_10_cum,y, label="N=10")
plt.plot(dist_100_cum,y, label="N=100")
plt.legend(loc='best')
plt.title('Cumulative density function of Gaussian distribution')
plt.ylabel('Percentage of area under curve up to this point')
plt.show()

plt.figure("qq")
plt.plot(dist_2_cum,x, label="N=2")
plt.plot(dist_10_cum,x, label="N=10")
plt.plot(dist_100_cum,x, label="N=100")
plt.legend(loc='best')
plt.title('QQ')
plt.xlabel('a estimator')
plt.ylabel('gauss')
plt.show()