From 6ea28d4dd867d1e6e74e2e2738043137f17262e0 Mon Sep 17 00:00:00 2001
From: Bjarne Vincent <bjarne.vincent@inrae.fr>
Date: Mon, 20 Oct 2025 16:28:30 +0200
Subject: [PATCH] Fixed the problem arising when loading UB-SediFlow data in
 AcouSed. The computations of X, VBI, etc. and the acoustic inversion still
 need to be modifed to account for the different spatial and temporal grids
 though.

---
 Model/acoustic_data_loader_UBSediFlow.py | 284 ++++++++++++-----------
 1 file changed, 144 insertions(+), 140 deletions(-)

diff --git a/Model/acoustic_data_loader_UBSediFlow.py b/Model/acoustic_data_loader_UBSediFlow.py
index 6747307..5bbb5ac 100644
--- a/Model/acoustic_data_loader_UBSediFlow.py
+++ b/Model/acoustic_data_loader_UBSediFlow.py
@@ -1,5 +1,5 @@
 # ============================================================================== #
-    # acoustic_data_loder_UBSediFlow.py - AcouSed                                   #
+    # acoustic_data_loder_UBSediFlow.py - AcouSed                            #
     # Copyright (C) 2024  INRAE                                              #
     #                                                                        #
     # This program is free software: you can redistribute it and/or modify   #
@@ -15,7 +15,7 @@
     # You should have received a copy of the GNU General Public License      #
     # along with this program.  If not, see <https://www.gnu.org/licenses/>. #
 
-    #                                                      by Brahim MOUDJED #
+    #                                   by Brahim MOUDJED and Bjarne VINCENT #
 # ============================================================================== #
 
 # -*- coding: utf-8 -*-
@@ -32,7 +32,7 @@ class AcousticDataLoaderUBSediFlow:
 
         self.path_BS_raw_data = path_BS_raw_data
 
-        # --- Extract Backscatter acoustic raw data with  class ---
+        # --- Extract Backscatter acoustic raw data with class ---
 
         # Extraction function:
 
@@ -41,177 +41,178 @@ class AcousticDataLoaderUBSediFlow:
 
         # # --- Date and Hour of measurements read on udt data file ---
 
-        self._date = time_begin.date()
-        self._hour = time_begin.time()
+        self._date = time_begin.date() #Date when the data has been recorded
+        self._hour = time_begin.time() #Starting time
 
-        self._freq = np.array([[]])
+        self._freq = np.array([[]]) #Array of the frequencies (Hz) used for each channel
 
-        self._nb_profiles = []
+        self._nb_profiles = [] #Number of profiles (i.e., time stamps) for each configuration
         self._nb_profiles_per_sec = []
-        self._nb_cells = []
-        self._cell_size = []
-        self._pulse_length = []
-        self._nb_pings_per_sec = []
+        self._nb_cells = [] #Number of cells (i.e., vertical coordinates) for each configuration
+        self._cell_size = [] #Cell size (m) for each configuration
+        self._pulse_length = [] #Duration (s) of a pulse emitted by the instrument for each configuration
+        self._nb_pings_per_sec = [] #Number of pings (in Ubertone's user manual, that quantity if referred to as the Pulse Repetition Frequency (PRF))
         self._nb_pings_averaged_per_profile = []
-        self._kt = []
-        self._gain_rx = []
-        self._gain_tx = []
+        self._kt = [] #Calibration constant for each configuration (note: 'kt' is a function of the emitted waves' frequency)
+        self._gain_rx = [] #Gain at reception (dB)
+        self._gain_tx = [] #Gain at emission (dB/m)
+        
 
-        self._r = np.array([[]])
-        self._time = np.array([[]])
-        self._time_snr = np.array([[]])
-        self._BS_raw_data = np.array([[[]]])
-        time_len = []
+        r_list = [] #List of 1D NumPy arrays storing the cells' vertical coordinates (m) for each frequency
+        time_list = [] #List of 1D NumPy arrays storing the time stamps (s) of the signals recorded for each frequency
+        bs_list = [] #List of 2D NumPy arrays storing the backscatter (BS) signals recorded for each frequency 
+        
+        time_len = np.array([]) #1D NumPy array storing the number of time stamps (i.e., vertical profiles) for each frequency 
+        r_len = np.array([]) #1D NumPy array storing the number of vertical coordinates for each frequency
 
+
+        # Loop over the number of UB SediFlow configurations:
+        
         for config in param_us_dicts.keys():
 
+            # Loop over the number of channels for the current configuration
+            # (reminder: a channel refers to a transducer hence frequency):
+            
             for channel in param_us_dicts[config].keys():
 
-                # --- Frequencies ---
-                self._freq = np.append(self._freq, param_us_dicts[config][channel]['f0'])
+                # --- Extract the frequencies, k_t constant, cell size, etc. ---
 
-                self._nb_cells = np.append(self._nb_cells, param_us_dicts[config][channel]['n_cell'])
-                self._cell_size = np.append(self._cell_size, param_us_dicts[config][channel]['r_dcell'])
-                self._pulse_length = np.append(self._pulse_length, 0)
-                self._nb_pings_per_sec = np.append(self._nb_pings_per_sec, param_us_dicts[config][channel]['prf'])
-                self._nb_pings_averaged_per_profile = np.append(self._nb_pings_averaged_per_profile, param_us_dicts[config][channel]['n_p'])
-                self._kt = np.append(self._kt, 0)
-                self._gain_rx = np.append(self._gain_rx, param_us_dicts[config][channel]['a0'])
-                self._gain_tx = np.append(self._gain_tx, param_us_dicts[config][channel]['a1'])
+                r0 = param_us_dicts[config][channel]['r_cell1']
+                freq = param_us_dicts[config][channel]['f0']
+                nb_cells = param_us_dicts[config][channel]['n_cell']
+                cell_size = param_us_dicts[config][channel]['r_dcell']
+                pulse_length = 0
+                nb_pings_per_sec = param_us_dicts[config][channel]['prf']
+                nb_pings_averaged_per_profile = param_us_dicts[config][channel]['n_p']
+                kt = 0
+                gain_rx = param_us_dicts[config][channel]['a0']
+                gain_tx = param_us_dicts[config][channel]['a1']
+                
+                
+                self._freq = np.append(self._freq, freq)
 
-                # --- Depth for each frequencies ---
-                depth = [param_us_dicts[config][channel]['r_dcell'] * i
-                         for i in list(range(param_us_dicts[config][channel]['n_cell']))]
+                self._nb_cells.append(nb_cells)
+                self._cell_size.append(cell_size)
+                self._pulse_length.append(pulse_length)
+                self._nb_pings_per_sec.append(nb_pings_per_sec)
+                self._nb_pings_averaged_per_profile.append(nb_pings_averaged_per_profile)
+                self._kt.append(kt)
+                self._gain_rx.append(gain_rx)
+                self._gain_tx.append(gain_tx)
 
-                if self._r.shape[1] == 0:
-                    self._r = np.array([depth])
-                else:
-                    if len(depth) == self._r.shape[1]:
-                        self._r = np.append(self._r, np.array([depth]), axis=0)
-                    elif len(depth) < self._r.shape[1]:
-                        self._r = self._r[:, :len(depth)]
-                        self._r = np.append(self._r, np.array([depth]), axis=0)
-                    elif len(depth) > self._r.shape[1]:
-                        self._r = np.append(self._r, np.array([depth[:self._r.shape[1]]]), axis=0)
 
-                # --- BS Time for each frequencies ---
-                time = [[(t - data_us_dicts[config][channel]['echo_avg_profile']['time'][0]).total_seconds()
-                              for t in data_us_dicts[config][channel]['echo_avg_profile']['time']]]
-                time_len = np.append(time_len, len(time[0]))
+                # --- Determine the vertical coordinate (m) of the measurement points at each frequency ---
 
-                if len(time_len) == 1:
-                    self._time = np.array(time)
-                elif self._time.shape[1] == len(time[0]):
-                    self._time = np.append(self._time, time, axis=0)
-                elif self._time.shape[1] > len(time[0]):
-                    self._time = np.delete(self._time,
-                                           [int(np.min(time_len)) + int(i) - 1 for i in range(1, int(np.max(time_len))-int(np.min(time_len))+1)],
-                                           axis=1)
-                    self._time = np.append(self._time, time, axis=0)
-                elif self._time.shape[1] < len(time[0]):
-                    time = time[:int(np.max(time_len)) - (int(np.max(time_len)) - int(np.min(time_len)))]
-                    self._time = np.append(self._time, time, axis=0)
+                depth = np.linspace( r0, r0 + (nb_cells - 1) * cell_size, nb_cells )
+                r_list.append(depth)
 
-                self._nb_profiles = np.append(self._nb_profiles, self._time.shape[1])
-                self._nb_profiles_per_sec = np.append(self._nb_profiles_per_sec,
-                                                      param_us_dicts[config][channel]['n_avg'])
+                r_len = np.append(r_len, depth.shape[0]) #Store the number of cells for the current frequency
+                                  
 
-                # --- US Backscatter raw signal ---
-                BS_data = np.array([[]])
+                # --- Extract the time stamps of the backscatter (BS) signals for the current frequency ---
 
-                if config == 1:
+                time = [(t - data_us_dicts[config][channel]['echo_avg_profile']['time'][0]).total_seconds()
+                              for t in data_us_dicts[config][channel]['echo_avg_profile']['time']]
+                time = np.array(time)
+                time_list.append(time)
 
-                    BS_data = np.array([data_us_dicts[config][channel]['echo_avg_profile']['data'][0]])
+                time_len = np.append( time_len, time.shape[0] )
+                
+                self._nb_profiles.append(time.shape[0])
+                self._nb_profiles_per_sec.append( param_us_dicts[config][channel]['n_avg'] )
 
-                    for i in range(self._time.shape[1]):
-                        BS_data = np.append(BS_data,
-                                            np.array(
-                                                [data_us_dicts[config][channel]['echo_avg_profile']['data'][i]]),
-                                            axis=0)
+                
 
-                    self._BS_raw_data = np.array([BS_data[:self._time.shape[1], :].transpose()])
+                # --- Extract the BS signal for the current frequency ---                
 
-                else:
+                # Loop over all time stamps:
+                
+                for i in range( int(time_len[-1]) ):
 
-                    BS_data = np.array([data_us_dicts[config][channel]['echo_avg_profile']['data'][0]])
+                    # Extract the BS profile at time stamp 'i':
+                    
+                    currProfile = np.array( [ data_us_dicts[config][channel]['echo_avg_profile']['data'][i] ] )
 
-                    for j in range(self._time.shape[1]):
-                        BS_data = np.append(BS_data,
-                                            np.array(
-                                                [data_us_dicts[config][channel]['echo_avg_profile']['data'][j]]),
-                                            axis=0)
-                    BS_data = np.array([BS_data.transpose()])
 
-                    # 1- time shape > BS data shape
-                    #    <=> data recorded with the frequency are longer than data recorded with the other lower frequencies
-                    if (BS_data.shape[2] > self._BS_raw_data.shape[2]):
+                    # Store the BS profile as a new line of 'bs_data':
 
-                        if (BS_data.shape[1] > self._BS_raw_data.shape[1]):
-                                #                         print(f"BS_data shape[0] = {BS_data.shape[0]}")
-                                #                         print(f"BS_raw_data shape[2] = {BS_raw_data.shape[2]}")
-                            self._BS_raw_data = np.append(self._BS_raw_data,
-                                                    BS_data[:, :self._BS_raw_data.shape[1], :self._BS_raw_data.shape[2]],
-                                                    axis=0)
+                    if i == 0:
 
-                        elif (BS_data.shape[1] < self._BS_raw_data.shape[1]):
-                            self._BS_raw_data = np.append(self._BS_raw_data[:, :BS_data.shape[1], :],
-                                                    BS_data[:, :, :self._BS_raw_data.shape[2]],
-                                                    axis=0)
+                        bs_data = currProfile
 
-                        else:
-                            self._BS_raw_data = np.append(self._BS_raw_data,
-                                                    BS_data[:, :, :self._BS_raw_data.shape[2]],
-                                                    axis=0)
-
-                    # 2- time shape < BS data shape
-                    #    <=> data recorded with the frequency are shorter than data recorded with the other lower frequencies
-                    elif BS_data.shape[2] < self._BS_raw_data.shape[2]:
-
-                        if (BS_data.shape[1] > self._BS_raw_data.shape[1]):
-                            self._BS_raw_data = np.append(self._BS_raw_data[:, :, BS_data.shape[2]],
-                                                    BS_data[:, :self._BS_raw_data.shape[1], :],
-                                                    axis=0)
-
-                        elif (BS_data.shape[1] < self._BS_raw_data.shape[1]):
-                            self._BS_raw_data = np.append(self._BS_raw_data[:, :BS_data.shape[1], BS_data.shape[2]],
-                                                    BS_data,
-                                                    axis=0)
-
-                        else:
-                            self._BS_raw_data = np.append(self._BS_raw_data[:, :, BS_data.shape[0]],
-                                                    BS_data,
-                                                    axis=0)
-
-                    # 3- time shape = BS data shape
-                    #    <=> data recorded with the frequency have the same duration than data recorded with the other lower frequency
                     else:
 
-                        if (BS_data.shape[1] > self._BS_raw_data.shape[1]):
+                        bs_data = np.append( bs_data, currProfile, axis=0 )
+                    
 
-                            self._BS_raw_data = np.append(self._BS_raw_data,
-                                                    BS_data[:, :self._BS_raw_data.shape[1], :], axis=0)
 
-                        elif (BS_data.shape[1] < self._BS_raw_data.shape[1]):
+                bs_list.append( bs_data.transpose() ) #Now: bs_list[k].shape[0] = nb_cells, and bs_list[k].shape[1] = time_len[k]
+                
 
-                            self._BS_raw_data = np.append(self._BS_raw_data[:, :BS_data.shape[1], :],
-                                                    BS_data, axis=0)
-
-                        else:
-
-                            self._BS_raw_data = np.append(self._BS_raw_data,
-                                                        BS_data, axis=0)
-
-        if self._time.shape[1] > self._BS_raw_data.shape[2]:
-            self._time = self._time[:, :self._BS_raw_data.shape[2]]
-        elif self._time.shape[1] < self._BS_raw_data.shape[2]:
-            self._BS_raw_data = self._BS_raw_data[:, :, :self._time.shape[1]]
-        else:
-            self._time = self._time
-            self._BS_raw_data = self._BS_raw_data
-
-        self._time = self._time[:, :self._BS_raw_data.shape[2]]
 
         self._freq_text = np.array([str(f) + " MHz" for f in [np.round(f*1e-6, 2) for f in self._freq]])
+        
+                
+        '''
+        Now, whe shall reshape the data (BS signal arrays, time stamps arrays and vertical coordinates arrays)
+        so that the number of time stamps is the same for all frequencies, as well as the number of cells (i.e.
+        the number of vertical coordinates). The reason for doing this is that AcouSed stores the data in 3D
+        NumPy arrays (dim 0 --> frequencies, dim 1 --> depth and dim 2 --> time).
+
+        The idea is to fill the BS arrays with NaNs so that their size is the same for all frequencies. When
+        required, the NaNs are added AT THE END of the arrays. The vertical coordinates and time stamps arrays
+        cannot be filled with NaNs, since the 2D plotting routines of AcouSed require finite-valued time stamps
+        and vertical coordinates. We thus add fictitious time stamps and vertical coordinates, for which the
+        BS signal is NaN.
+        '''
+        
+
+        # First, find the maximum number of time stamps (i.e. vertical profiles) and vertical coordinates
+        # among the recorded signals:
+        
+        max_nb_profiles = int( np.max(time_len) )
+        max_nb_coords = int( np.max(r_len) )
+
+
+        # Second, loop over all frequencies and fill the corresponding arrays with NaNs (if needed):
+
+        self._BS_raw_data = np.zeros( ( len(bs_list), max_nb_coords, max_nb_profiles ) )
+        self._r = np.zeros( ( len(bs_list), max_nb_coords ) )
+        self._time = np.zeros( ( len(bs_list), max_nb_profiles ) )
+
+        for k in range( len(bs_list) ):
+            
+            # Fill the vertical coordinates with NaNs:
+
+            cell_size = self._cell_size[k]
+            time_step = time_list[k][1] - time_list[k][0]
+            
+            while bs_list[k].shape[0] < max_nb_coords:
+
+                bs_list[k] = np.append( bs_list[k], np.nan * np.ones( (1, bs_data[k].shape[1]) ), axis=0 )
+                r_list[k] = np.append( r_list[k], r_list[k][-1] + cell_size )
+
+                r_len[k] += 1
+
+
+            # Fill the time stamps with NaNs:
+
+            while bs_list[k].shape[1] < max_nb_profiles:
+
+                bs_list[k] = np.append( bs_list[k], np.nan * np.ones( ( bs_data[k].shape[0], 1 ) ), axis=1 )
+                time_list[k] = np.append( time_list[k], time_list[k][-1] + time_step )
+
+                time_len[k] += 1
+                self._nb_profiles[k] += 1
+
+
+            # Finally, form the 2D arrays for each frequency and store them in 3D NumPy arrays:
+
+            self._BS_raw_data[k,:,:] = bs_list[k]
+            self._time[k,:] = time_list[k]
+            self._r[k,:] = r_list[k]
+
+        
 
     def reshape_BS_raw_data(self):
         BS_raw_cross_section = np.reshape(self._BS_raw_data,
@@ -222,7 +223,10 @@ class AcousticDataLoaderUBSediFlow:
     def reshape_r(self):
         r = np.zeros((self._r.shape[1]*self._time.shape[1], len(self._freq)))
         for i, _ in enumerate(self._freq):
-            r[:, i] = np.repeat(self._r[i, :], self._time.shape[1])
+            for j in range(self._time.shape[1]):
+
+                r[j*self._r.shape[1]:(j+1)*self._r.shape[1], i] = self._r[i, :]
+                
         return r
 
     def compute_r_2D(self):