Appendix I

Appendix I#

# First of all, some general python and eWaterCycle libraries need to be imported:
# general python
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from scipy.optimize import minimize

#niceties
from rich import print
import seaborn as sns
sns.set()

# general eWaterCycle
import ewatercycle
import ewatercycle.models
import ewatercycle.forcing
# As stated in chapter 3, eWaterCycle provides access to the Caravan dataset, from which a Camel dataset of the catchment of the Wien River is loaded.
camelsgb_id = "lamah_208082"
# calibration dates on 75% of experiment dates

experiment_start_date = "1981-08-01T00:00:00Z"
experiment_end_date = "2018-01-01T00:00:00Z"

calibration_start_time = experiment_start_date
calibration_end_time = "2007-08-31T00:00:00Z"

validation_start_time = "2007-09-01T00:00:00Z" 
validation_end_time = experiment_end_date
# Even though we don't use caravan data as forcing, we still call it forcing
# because it is generated using the forcing module of eWaterCycle
forcing_path_caravan = Path.home() / "forcing" / camelsgb_id / "caravan"
forcing_path_caravan.mkdir(exist_ok=True, parents=True)

# If someone has prepared forcing for you, this path needs to be changed to that location. 
prepared_forcing_path_caravan_central = Path("location/of/forcing/data")
# load data that you or someone else generated previously
camelsgb_forcing = ewatercycle.forcing.sources['CaravanForcing'].load(Path("/home/thirza/forcing/lamah_208082/caravan"))
# Above, it can be seen that the forcing data contains precipitation, potential evaporation, discharge and the near-surface temperatures (tas). 
# For this research, only the discharge data is relevant. The discharge data is loaded from the forcing below.

ds_forcing = xr.open_mfdataset([camelsgb_forcing['Q']])
#               Si,  Su, Sf, Ss, Sp
s_0 = np.array([0,  100,  0,  5, 0])
S_names = ["Interception storage", "Unsaturated Rootzone Storage", "Fastflow storage", "Slowflow storage", "Groundwater storage"]

p_names = ["$I_{max}$",  "$C_e$",  "$Su_{max}$", "β",  "$P_{max}$",  "$T_{lag}$",   "$K_f$",   "$K_s$", "FM"]
param_names = ["Imax","Ce",  "Sumax", "beta",  "Pmax",  "Tlag",   "Kf",   "Ks", "FM"]
result = [7.70574780e-07, 1.32351315e+00, 1.00047603e+02, 3.89427105e+00,
       6.66366529e-01, 4.30576783e-02, 1.00508560e+00, 1.94023052e+00,
       4.58706486e-01]
par = result
model = ewatercycle.models.HBVLocal(forcing=camelsgb_forcing)
config_file, _ = model.setup(parameters=par, initial_storage=s_0)
model.initialize(config_file)
Q_m = []
time = []
while model.time < model.end_time:
    model.update()
    Q_m.append(model.get_value("Q")[0])
    time.append(pd.Timestamp(model.time_as_datetime))
model.finalize()
model_output = pd.Series(data=Q_m, name="Modelled_discharge", index=time)
df = pd.DataFrame(model_output)
Q_pandas = ds_forcing["Q"].to_dataframe()
calibration_start_time = pd.to_datetime(calibration_start_time)
calibration_end_time = pd.to_datetime(calibration_end_time)

df.index = df.index.tz_localize(None)  # Remove timezone
Q_pandas.index = Q_pandas.index.tz_localize(None)
calibration_start_time = calibration_start_time.tz_localize(None)
calibration_end_time = calibration_end_time.tz_localize(None)

model_output_filtered1 = model_output.loc[calibration_start_time:calibration_end_time]
ds_forcing_filtered1 = ds_forcing["Q"].loc[calibration_start_time:calibration_end_time]
# Sort data from high to low
sorted_model_data = np.sort(model_output_filtered1)[::-1]  
sorted_obs_data = np.sort(ds_forcing_filtered1)[::-1]  

# Create figure and subplots
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharey=True)  

# Histogram for Model Data
axes[0].hist(sorted_model_data, bins=50, color='blue', edgecolor='black', alpha=0.7)
axes[0].set_title('Model Discharges')
axes[0].set_xlabel('Discharge (mm/day)')
axes[0].set_ylabel('Frequency')

# Histogram for Observed Data
axes[1].hist(sorted_obs_data, bins=50, color='red', edgecolor='black', alpha=0.7)
axes[1].set_title('Observed Discharges')
axes[1].set_xlabel('Discharge (mm/day)')

# Add a shared caption below the plots
fig.text(0.5, -0.05, 
         "Figure I1: Histogram of the observed and modelled discharges for the calibration period.\n"
         "It can be seen that the modelled discharges are similar distributed as the obsreved discharges. ",
         ha="center", fontsize=9)

fig.suptitle("Calibration Period", fontsize=12, fontweight='bold')


plt.tight_layout()  # Adjust layout to fit everything nicely
plt.show()
../../../../_images/942ced6dd32b6416748463af0c352bd08b84e87b4c30a336d4bf8e0967dc26b3.png
# Sort data from high to low and filter values >= 2 mm/day
sorted_model_data = np.sort(model_output_filtered1)[::-1]  
sorted_model_data = sorted_model_data[sorted_model_data >= 5]  

sorted_obs_data = np.sort(ds_forcing_filtered1)[::-1]  
sorted_obs_data = sorted_obs_data[sorted_obs_data >= 5]  

# Create figure and subplots
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharey=True)  

# Histogram for Model Data
axes[0].hist(sorted_model_data, bins=20, color='blue', edgecolor='black', alpha=0.7)
axes[0].set_title('Model Discharges (≥ 5 mm/day)')
axes[0].set_xlabel('Discharge (mm/day)')
axes[0].set_ylabel('Frequency')

# Histogram for Observed Data
axes[1].hist(sorted_obs_data, bins=20, color='red', edgecolor='black', alpha=0.7)
axes[1].set_title('Observed Discharges (≥ 5 mm/day)')
axes[1].set_xlabel('Discharge (mm/day)')

# Add a shared caption below the plots
fig.text(0.5, -0.1, 
         "Figure I2: Histogram of the observed and modelled discharges for the calibration period.\n"
         "Only values ≥ 5 mm/day are included to focus on relevant discharges.\n"
         "The distribution of the modelled and observed discharge are very similar",
         ha="center", fontsize=9)
fig.suptitle("Calibration Period", fontsize=12, fontweight='bold')
plt.tight_layout()  # Adjust layout to fit everything nicely
plt.show()
../../../../_images/7b55ae2cbce5e420727c2f7ed9a9c6f54854f486fea30c7e53b2ee80f94eb4ec.png
validation_start_time = pd.to_datetime(validation_start_time)
validation_end_time = pd.to_datetime(validation_end_time)

df.index = df.index.tz_localize(None)  # Remove timezone
Q_pandas.index = Q_pandas.index.tz_localize(None)
validation_start_time = validation_start_time.tz_localize(None)
validation_end_time = validation_end_time.tz_localize(None)

model_output_filtered2 = model_output.loc[validation_start_time:validation_end_time]
ds_forcing_filtered2 = ds_forcing["Q"].loc[validation_start_time:validation_end_time]
# Sort data from high to low
sorted_model_data = np.sort(model_output_filtered2)[::-1]  
sorted_obs_data = np.sort(ds_forcing_filtered2)[::-1]  

# Create figure and subplots
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharey=True)  

# Histogram for Model Data
axes[0].hist(sorted_model_data, bins=50, color='blue', edgecolor='black', alpha=0.7)
axes[0].set_title('Model Discharges')
axes[0].set_xlabel('Discharge (mm/day)')
axes[0].set_ylabel('Frequency')

# Histogram for Observed Data
axes[1].hist(sorted_obs_data, bins=50, color='red', edgecolor='black', alpha=0.7)
axes[1].set_title('Observed Discharges')
axes[1].set_xlabel('Discharge (mm/day)')

# Add a shared caption below the plots
fig.text(0.5, -0.05, 
         "Figure I3: Histogram of the observed and modelled discharges for the validation period.\n"
         "It can be seen that the model overestimates the amount of very small discharge values.",
         ha="center", fontsize=9)
fig.suptitle("Validation Period", fontsize=12, fontweight='bold')
plt.tight_layout()  # Adjust layout to fit everything nicely
plt.show()
../../../../_images/b94af01f2ba2434d8f2c2426077c6bcc3784aa2b41d65bb6810c9c1a78a266fa.png
# Sort data from high to low and filter values >= 2 mm/day
sorted_model_data = np.sort(model_output_filtered2)[::-1]  
sorted_model_data = sorted_model_data[sorted_model_data >= 5]  

sorted_obs_data = np.sort(ds_forcing_filtered2)[::-1]  
sorted_obs_data = sorted_obs_data[sorted_obs_data >= 5]  

# Create figure and subplots
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharey=True)  

# Histogram for Model Data
axes[0].hist(sorted_model_data, bins=20, color='blue', edgecolor='black', alpha=0.7)
axes[0].set_title('Model Discharges (≥ 5 mm/day)')
axes[0].set_xlabel('Discharge (mm/day)')
axes[0].set_ylabel('Frequency')

# Histogram for Observed Data
axes[1].hist(sorted_obs_data, bins=20, color='red', edgecolor='black', alpha=0.7)
axes[1].set_title('Observed Discharges (≥ 5 mm/day)')
axes[1].set_xlabel('Discharge (mm/day)')

# Add a shared caption below the plots
fig.text(0.5, -0.1, 
         "Figure I4: Histogram of the observed and modelled discharges for the validation period.\n"
         "Only values ≥ 5 mm/day are included to focus on relevant discharges.\n"
         "It can be seen that the distribution of modelled discharge is very different than the one of the observed discharge",
         ha="center", fontsize=9)
fig.suptitle("Validation Period", fontsize=12, fontweight='bold')
plt.tight_layout()  # Adjust layout to fit everything nicely
plt.show()
../../../../_images/dda7887a92c39f8733cb4cdc42fcdcd259c5a35fc938a6fd303038e671fc8acf.png