acoused/Model/acoustic_data_loader_UBSedi...

# ============================================================================== #
    # acoustic_data_loder_UBSediFlow.py - AcouSed                            #
    # Copyright (C) 2024  INRAE                                              #
    #                                                                        #
    # This program is free software: you can redistribute it and/or modify   #
    # it under the terms of the GNU General Public License as published by   #
    # the Free Software Foundation, either version 3 of the License, or      #
    # (at your option) any later version.                                    #
    #                                                                        #
    # This program is distributed in the hope that it will be useful,        #
    # but WITHOUT ANY WARRANTY; without even the implied warranty of         #
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
    # GNU General Public License for more details.                           #
    #                                                                        #
    # You should have received a copy of the GNU General Public License      #
    # along with this program.  If not, see <https://www.gnu.org/licenses/>. #

    #                                   by Brahim MOUDJED and Bjarne VINCENT #
# ============================================================================== #

# -*- coding: utf-8 -*-


import numpy as np

from Model.udt_extract.raw_extract import raw_extract


class AcousticDataLoaderUBSediFlow:

    def __init__(self, path_BS_raw_data: str):

        self.path_BS_raw_data = path_BS_raw_data

        # --- Extract Backscatter acoustic raw data with class ---

        # Extraction function:

        device_name, time_begin, time_end, param_us_dicts, data_us_dicts, data_dicts, settings_dict \
            = raw_extract(self.path_BS_raw_data)

        # # --- Date and Hour of measurements read on udt data file ---

        self._date = time_begin.date() #Date when the data has been recorded
        self._hour = time_begin.time() #Starting time

        self._freq = np.array([[]]) #Array of the frequencies (Hz) used for each channel

        self._nb_profiles = [] #Number of profiles (i.e., time stamps) for each configuration
        self._nb_profiles_per_sec = []
        self._nb_cells = [] #Number of cells (i.e., vertical coordinates) for each configuration
        self._cell_size = [] #Cell size (m) for each configuration
        self._pulse_length = [] #Duration (s) of a pulse emitted by the instrument for each configuration
        self._nb_pings_per_sec = [] #Number of pings (in Ubertone's user manual, that quantity if referred to as the Pulse Repetition Frequency (PRF))
        self._nb_pings_averaged_per_profile = []
        self._kt = [] #Calibration constant for each configuration (note: 'kt' is a function of the emitted waves' frequency)
        self._gain_rx = [] #Gain at reception (dB)
        self._gain_tx = [] #Gain at emission (dB/m)


        r_list = [] #List of 1D NumPy arrays storing the cells' vertical coordinates (m) for each frequency
        time_list = [] #List of 1D NumPy arrays storing the time stamps (s) of the signals recorded for each frequency
        bs_list = [] #List of 2D NumPy arrays storing the backscatter (BS) signals recorded for each frequency

        time_len = np.array([]) #1D NumPy array storing the number of time stamps (i.e., vertical profiles) for each frequency
        r_len = np.array([]) #1D NumPy array storing the number of vertical coordinates for each frequency


        # Loop over the number of UB SediFlow configurations:

        for config in param_us_dicts.keys():

            # Loop over the number of channels for the current configuration
            # (reminder: a channel refers to a transducer hence frequency):

            for channel in param_us_dicts[config].keys():

                # --- Extract the frequencies, k_t constant, cell size, etc. ---

                r0 = param_us_dicts[config][channel]['r_cell1']
                freq = param_us_dicts[config][channel]['f0']
                nb_cells = param_us_dicts[config][channel]['n_cell']
                cell_size = param_us_dicts[config][channel]['r_dcell']
                pulse_length = 0
                nb_pings_per_sec = param_us_dicts[config][channel]['prf']
                nb_pings_averaged_per_profile = param_us_dicts[config][channel]['n_p']
                kt = 0
                gain_rx = param_us_dicts[config][channel]['a0']
                gain_tx = param_us_dicts[config][channel]['a1']


                self._freq = np.append(self._freq, freq)

                self._nb_cells.append(nb_cells)
                self._cell_size.append(cell_size)
                self._pulse_length.append(pulse_length)
                self._nb_pings_per_sec.append(nb_pings_per_sec)
                self._nb_pings_averaged_per_profile.append(nb_pings_averaged_per_profile)
                self._kt.append(kt)
                self._gain_rx.append(gain_rx)
                self._gain_tx.append(gain_tx)


                # --- Determine the vertical coordinate (m) of the measurement points at each frequency ---

                depth = np.linspace( r0, r0 + (nb_cells - 1) * cell_size, nb_cells )
                r_list.append(depth)

                r_len = np.append(r_len, depth.shape[0]) #Store the number of cells for the current frequency


                # --- Extract the time stamps of the backscatter (BS) signals for the current frequency ---

                time = [(t - data_us_dicts[config][channel]['echo_avg_profile']['time'][0]).total_seconds()
                              for t in data_us_dicts[config][channel]['echo_avg_profile']['time']]
                time = np.array(time)
                time_list.append(time)

                time_len = np.append( time_len, time.shape[0] )

                self._nb_profiles.append(time.shape[0])
                self._nb_profiles_per_sec.append( param_us_dicts[config][channel]['n_avg'] )


                # --- Extract the BS signal for the current frequency ---

                # Loop over all time stamps:

                for i in range( int(time_len[-1]) ):

                    # Extract the BS profile at time stamp 'i':

                    currProfile = np.array( [ data_us_dicts[config][channel]['echo_avg_profile']['data'][i] ] )


                    # Store the BS profile as a new line of 'bs_data':

                    if i == 0:

                        bs_data = currProfile

                    else:

                        bs_data = np.append( bs_data, currProfile, axis=0 )


                bs_list.append( bs_data.transpose() ) #Now: bs_list[k].shape[0] = nb_cells, and bs_list[k].shape[1] = time_len[k]


        self._freq_text = np.array([str(f) + " MHz" for f in [np.round(f*1e-6, 2) for f in self._freq]])


        '''
        Now, whe shall reshape the data (BS signal arrays, time stamps arrays and vertical coordinates arrays)
        so that the number of time stamps is the same for all frequencies, as well as the number of cells (i.e.
        the number of vertical coordinates). The reason for doing this is that AcouSed stores the data in 3D
        NumPy arrays (dim 0 --> frequencies, dim 1 --> depth and dim 2 --> time).

        The idea is to fill the BS arrays with NaNs so that their size is the same for all frequencies. When
        required, the NaNs are added AT THE END of the arrays. The vertical coordinates and time stamps arrays
        cannot be filled with NaNs, since the 2D plotting routines of AcouSed require finite-valued time stamps
        and vertical coordinates. We thus add fictitious time stamps and vertical coordinates, for which the
        BS signal is NaN.
        '''


        # First, find the maximum number of time stamps (i.e. vertical profiles) and vertical coordinates
        # among the recorded signals:

        max_nb_profiles = int( np.max(time_len) )
        max_nb_coords = int( np.max(r_len) )


        # Second, loop over all frequencies and fill the corresponding arrays with NaNs (if needed):

        self._BS_raw_data = np.zeros( ( len(bs_list), max_nb_coords, max_nb_profiles ) )
        self._r = np.zeros( ( len(bs_list), max_nb_coords ) )
        self._time = np.zeros( ( len(bs_list), max_nb_profiles ) )

        for k in range( len(bs_list) ):

            # Fill the vertical coordinates with NaNs:

            cell_size = self._cell_size[k]
            time_step = time_list[k][1] - time_list[k][0]

            while bs_list[k].shape[0] < max_nb_coords:

                bs_list[k] = np.append( bs_list[k], np.nan * np.ones( (1, bs_data[k].shape[1]) ), axis=0 )
                r_list[k] = np.append( r_list[k], r_list[k][-1] + cell_size )

                r_len[k] += 1


            # Fill the time stamps with NaNs:

            while bs_list[k].shape[1] < max_nb_profiles:

                bs_list[k] = np.append( bs_list[k], np.nan * np.ones( ( bs_data[k].shape[0], 1 ) ), axis=1 )
                time_list[k] = np.append( time_list[k], time_list[k][-1] + time_step )

                time_len[k] += 1
                self._nb_profiles[k] += 1


            # Finally, store the 2D arrays for each frequency in 3D NumPy arrays:

            self._BS_raw_data[k,:,:] = bs_list[k]
            self._time[k,:] = time_list[k]
            self._r[k,:] = r_list[k]


    def reshape_BS_raw_data(self):

        '''
        Form and return a table from the raw BS (backscatter) data loaded into AcouSed. That table
        is the one to be displayed in the "Table of values" section of the "Acoustic data" tab.

        The table is a L*K NumPy array, where 'K' stands for the number of frequencies in the
        loaded dataset, and L = R*T with 'R' the number of vertical coordinates and 'T' the number
        of time stamps.

        Finally, the BS array is flattened (i.e., converted to a 1D array) for each frequency so that
        the index spanning the vertical coordinates runs the fastest i.e.

        Line 0  : BS data at (t, r) = (t_0, r_0)
        Line 1  : BS data at (t, r) = (t_0, r_1)
        Line 2  : BS data at (t, r) = (t_0, r_2)
        ...
        Line R  : BS data at (t, r) = (t_0, r_R)
        Line R+1: BS data at (t, r) = (t_1, r_0)
        Line R+2: BS data at (t, r) = (t_1, r_1)
        ...

        '''


        # Create and initialise the returned 2D array. Note:
        # R = self._r.shape[1] = self._BS_raw_data.shape[1]
        # T = self._time.shape[1] = self._BS_raw_data.shape[2]
        # K = len(self._freq)

        BS_raw_cross_section = np.zeros( ( self._r.shape[1] * self._time.shape[1], len(self._freq) ) )


        # Fill the k-th column of 'BS_raw_cross_section' with the BS data recorded with the k-th frequency:

        for k in range( len(self._freq) ):

            BS_raw_cross_section[:, k] = np.reshape( self._BS_raw_data[k,:,:],
                                                     self._BS_raw_data.shape[1] * self._BS_raw_data.shape[2],
                                                     order = "F" )

        return BS_raw_cross_section


    def reshape_r(self):
        r = np.zeros((self._r.shape[1]*self._time.shape[1], len(self._freq)))
        for i, _ in enumerate(self._freq):
            for j in range(self._time.shape[1]):

                r[j*self._r.shape[1]:(j+1)*self._r.shape[1], i] = self._r[i, :]

        return r

    def compute_r_2D(self):
        r2D = np.zeros((self._freq.shape[0], self._r.shape[1], self._time.shape[1]))
        for f, _ in enumerate(self._freq):
            r2D[f, :, :] = np.repeat(np.transpose(self._r[0, :])[:, np.newaxis], self._time.shape[1], axis=1)
        return r2D

    def reshape_t(self):
        t = np.zeros((self._r.shape[1]*self._time.shape[1], len(self._freq)))
        for i, _ in enumerate(self._freq):
            t[:, i] = np.repeat(self._time[i, :], self._r.shape[1])
        return t

    def reshape_t_snr(self):
        t = np.zeros((self._r.shape[1]*self._time_snr.shape[1], len(self._freq)))
        for i, _ in enumerate(self._freq):
            t[:, i] = np.repeat(self._time_snr[i, :], self._r.shape[1])
        return t


# if __name__ == "__main__":
#     AcousticDataLoaderUBSediFlow(path_BS_raw_data0 + filename0)