# -*- coding: utf-8 -*- """ Created on Thur Jan 10 2019 @author: Adrien Vergne """ ############################################################################### # # # GRAIN SIZE TOOLS # # # ############################################################################### # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # Adrien VERGNE - Irstea Lyon - 2019 # Program Python 3.5 # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # This script is designed for loading PSD data from Malvern Mastersizer # Excek file output, as well as for fitting Gausian mixture models to # the measured PSD ########################### Loading libraries ######################### import numpy as np # Numpy (array and matrix computing) import pandas as pd # DataFrames from math import sqrt, pi from scipy.interpolate import Akima1DInterpolator # an interpolator from sklearn.mixture import GaussianMixture # Gaussian mixture model (demodulation) import matplotlib.pyplot as plt # matplotlib scientific plotting ########################### Loading Malvern PSD data ######################### class load_malvern: """This class loads Malvern particle size data from an Excel file""" def __init__(self, folder, file): # Excel file self.file_folder = folder self.file_name = file self.file_path = folder + '/' + file # Data self.raw_data = None self.size_inf = None self.size_sup = None self.size_center = None self.size_proba = None self.D10 = 0 self.D50 = 0 self.D90 = 0 # Loading data self.load() def load(self): self.raw_data = pd.read_excel(self.file_path, 0) self.size_inf = np.array(self.raw_data['Size x'][1:]) * 1e-6 / 2 self.size_sup = np.array(self.raw_data['Size y'][1:]) * 1e-6 / 2 self.size_center = (self.size_inf + self.size_sup)/2 self.size_proba = np.array(self.raw_data['% In Size'][1:])/100 self.D10 = self.raw_data['µm Diameter'][2] self.D50 = self.raw_data['µm Diameter'][5] self.D90 = self.raw_data['µm Diameter'][8] ########################### Fitting GMM ######################### def mix_gaussian_model(x, m_list, s_list, w_list): """This function computes a sum of gaussians""" res = np.zeros(np.shape(x)) for i in range(len(m_list)): res += w_list[i] * (1 / sqrt(2 * pi * s_list[i]**2) * np.exp(-(x - m_list[i])**2 / (2 * s_list[i]**2))) return res def simple_gaussian(x, m, s, w): """This function computes a simple guaussian""" res = w * (1 / sqrt(2 * pi * s**2) * np.exp(-(x - m)**2 / (2 * s**2))) return res class demodul_granulo: """This class demodulates a grain size distribution INPUTS: @size_classes: center of each size class (radius in meters) @proba_cumul_vol: cumulative volume probability of each size class @interpolate_min_size: start (i.e. minimum size) of cumulative volume probability interpolation (radius in meters) @interpolate_max_size: end (i.e. maximum size) of cumulative volume probability interpolation (radius in meters) """ def __init__(self, size_classes, proba_cumul_vol, interpolate_min_size, interpolate_max_size): ###### Data containers ###### # ~~~~~~~~ Inputs ~~~~~~~~~~# # size classes (radius in meters) self.size_classes = size_classes # volume cumulative probability (from 0 to 1) self.proba_cumul_vol = proba_cumul_vol # start (i.e. minimum size) of cumulative volume probability interpolation self.interpolate_min_size = interpolate_min_size # end (i.e. maximum size) of cumulative volume probability interpolation self.interpolate_max_size = interpolate_max_size # ~~~~~~~~ Outputs ~~~~~~~~~~# # Resampled size classes in logscale self.sizes_log_resampled = np.array([]) # Interpolated cumulative probability self.cumul_interpolated = np.array([]) # Size class volume probability (no cumul) from resampled size classes self.resampled_proba_vol = np.array([]) # Size class number probability (no cumul) from resampled size classes self.resampled_proba_num = np.array([]) # List of Gaussian mixture models (from 1 to 10 modes) self.gaussian_mixture_model_list = [] # List of outputs data for each model (from 1 to 10 modes) --> see class "model_XX_modes_values" self.demodul_data_list = [] # Computing self.interpolate_data() self.demodulate() class model_XX_modes_values: """This sub-class is a data container for the outputs of a Gaussian Mixture Models of modes""" def __init__(self, x, nb_modes, mu_list, sigma_list, w_list): # Number of modes self.nb_modes = nb_modes # List of mode centers (mu) self.mu_list = mu_list # List of mode scalling parameters (sigma) self.sigma_list = sigma_list # List of mode weights (w) self.w_list = w_list # Gaussian Mixture model values self.model_total = mix_gaussian_model(x, self.mu_list, self.sigma_list, self.w_list) / \ np.sum(mix_gaussian_model(x, self.mu_list, self.sigma_list, self.w_list)) # list of single modes values self.modes_list = [] for i in range(self.nb_modes): # Computing the mode values self.modes_list.append(simple_gaussian(x, self.mu_list[i], self.sigma_list[i], self.w_list[i]) / np.sum(mix_gaussian_model(x, self.mu_list, self.sigma_list, self.w_list))) def interpolate_data(self): """This method interpolates the cumulative probability in logscale, between the specified min and max size""" # Phi steps log_scale = 0.005 # Computing maximum phi (minimum diameter) log_min = np.log(self.interpolate_min_size) # Computing minimum phi (maximum diameter) log_max = np.log(self.interpolate_max_size) # Creating an array of equally spaced phi self.sizes_log_resampled = np.arange(log_min, log_max, log_scale) # Akima interpolation f_logsize = Akima1DInterpolator(np.log(self.size_classes), self.proba_cumul_vol) self.cumul_interpolated = f_logsize(self.sizes_log_resampled) # Computing volume and number probability from interpolated data self.resampled_proba_vol = np.abs(np.diff(self.cumul_interpolated)) # np.abs is added because probability p # must be positive in np.random.choice # used in function below "demodulate" # Computing number probability, ss = np.sum(self.resampled_proba_vol /(np.exp(self.sizes_log_resampled[:-1]))**3) self.resampled_proba_num = (self.resampled_proba_vol / (np.exp(self.sizes_log_resampled[:-1]))**3) / ss # fig, ax = plt.subplots(1, 2) # ax[0].plot(list(range(len(self.cumul_interpolated))), self.cumul_interpolated) # ax[0].set_xlabel("n° point") # ax[0].set_ylabel("cumul interpolated") # # ax[1].plot(list(range(len(self.cumul_interpolated)-1)), self.resampled_proba_vol) # ax[1].set_xlabel("n° point") # ax[1].set_ylabel("diff cumul interpolated") # # plt.show() def demodulate(self): """This function demodulates the interpolated probability distribution""" # Sampling the size classes. Need to normalize the probability to get sum = 1.0 # print("**************************") # for i in range(self.resampled_proba_vol.shape[0]): # print(f"vol = {self.resampled_proba_vol[i]}, sum = {np.sum(self.resampled_proba_vol)}, " # f" p = {self.resampled_proba_vol[i] / np.sum(self.resampled_proba_vol)}") # print("**************************") ech_demod = np.random.choice(self.sizes_log_resampled[:-1], (10000,1), p=(self.resampled_proba_vol / np.sum(self.resampled_proba_vol))) # fitting models from 1 to 10 components N = np.arange(1, 11) self.gaussian_mixture_model_list = [None for i in range(len(N))] for i in range(len(N)): self.gaussian_mixture_model_list[i] = GaussianMixture(n_components=N[i], covariance_type='full').fit(ech_demod) mu_list = [m[0] for m in self.gaussian_mixture_model_list[i].means_] s_list = [sqrt(s[0][0]) for s in self.gaussian_mixture_model_list[i].covariances_] w_list = [w for w in self.gaussian_mixture_model_list[i].weights_] self.demodul_data_list.append(self.model_XX_modes_values(self.sizes_log_resampled, i + 1, mu_list, s_list, w_list)) def plot_interpolation(self): """This method plots the cumulative probability interpolation""" # ------- Plotting interpolation----------- # fig, ax = plt.subplots(1, 2, figsize = (11,4), dpi=100) # Data ax[0].plot(np.log(self.size_classes), self.proba_cumul_vol, linestyle='', marker='+') ax[1].plot(np.log(self.size_classes), self.proba_cumul_vol, linestyle='', marker='+') # Interpolation ax[0].plot(self.sizes_log_resampled, self.cumul_interpolated, color='red') ax[1].plot(self.sizes_log_resampled, self.cumul_interpolated, color='red') ax[0].set_xlabel("Log(a)") ax[0].set_ylabel("Cumulative fraction") ax[1].set_yscale('log') ax[1].set_ylim((1e-6, 2)) ax[1].set_xlabel("Log(a)") ax[1].set_ylabel("Cumulative fraction") ax[1].set_title('Double log scale') plt.show() def print_mode_data(self, n): """This method shows the parameters of the Gaussian Mixture Model of n modes""" if (n <= 0) or (n >10): print('Error: number of modes should be between 1 and 10') else: print('##### Number of modes: %d #####' %n) model = self.gaussian_mixture_model_list[n - 1] mode_center_txt = '' sigma_txt = '' weights_txt = '' sorted_indices = [] for i in range(n): sorted_indices.append(np.where(model.means_ == np.sort(model.means_, axis=0)[i])[0][0]) mode_center_txt = '%s %2.2f um' %(mode_center_txt, np.exp(model.means_[sorted_indices[i]])*1e6) sigma_txt = '%s %2.2f' %(sigma_txt, sqrt(model.covariances_[sorted_indices[i]][0][0])) weights_txt = '%s %2.1f %%' %(weights_txt, model.weights_[sorted_indices[i]]*100) print('Centers (radius):', mode_center_txt) print('Sigmas:', sigma_txt) print('Weigths:', weights_txt) def plot_modes(self, n): """This method plots the Gaussian Mixture Model of n components""" if (n <= 0) or (n >10): print('Error: number of modes should be between 1 and 10') else: # ---------------- Plotting ----------- # fig,(ax)=plt.subplots(2, 1, figsize = (9,6), dpi=100) # Data ax[0].plot(np.exp(self.sizes_log_resampled[0:-1])*1e6, self.resampled_proba_vol, label='Data - (proba vol.)', ls='--', linewidth=2, color='black') ax[1].plot(np.exp(self.sizes_log_resampled[0:-1])*1e6, self.resampled_proba_vol, label='Data - (proba vol.)', ls='--', linewidth=2, color='black') # Gaussian mixture model ax[0].plot(np.exp(self.sizes_log_resampled)*1e6, self.demodul_data_list[n - 1].model_total, color='red', label='Model (%d modes)' %(n)) ax[1].plot(np.exp(self.sizes_log_resampled)*1e6, self.demodul_data_list[n - 1].model_total, color='red', label='Model (%d modes)' %(n)) # Modes for i in range(n): sorted_indice = np.where(self.gaussian_mixture_model_list[n - 1].means_ == np.sort(self.gaussian_mixture_model_list[n - 1].means_, axis=0)[i])[0][0] ax[0].plot(np.exp(self.sizes_log_resampled)*1e6, self.demodul_data_list[n - 1].modes_list[sorted_indice]/np.sum(self.demodul_data_list[n - 1].model_total), label='Mode %d' %(i+1), linestyle=':') ax[1].plot(np.exp(self.sizes_log_resampled)*1e6, self.demodul_data_list[n - 1].modes_list[sorted_indice]/np.sum(self.demodul_data_list[n - 1].model_total), label='Mode %d' %(i+1), linestyle=':') # Axes properties ax[0].legend() ax[0].set_xlabel('Radius (um)', fontsize=13, weight='bold') ax[0].set_ylabel('Volume fraction', fontsize=13, weight='bold') ax[0].set_title('Gaussian Mixture Model -- %d modes' %(n), fontsize=14, weight='bold') ax[1].legend(loc=0) ax[1].set_xlabel('Radius (um)', fontsize=13, weight='bold') ax[1].set_ylabel('Volume fraction', fontsize=13, weight='bold') ax[1].set_xscale('log') fig.tight_layout() plt.show()