from pydub import AudioSegment import numpy as np from scipy.io.wavfile import write import time # Function to convert audio samples from int16 to float32 def int16_to_float32(samples): return samples.astype(np.float32) / 32768.0 # Function to convert audio samples from float32 to int16 def float32_to_int16(samples): return (samples * 32768).astype(np.int16) # Function to calculate the average amplitude of float32 samples def calculate_average_amplitude(float_samples, max_amp): if max_amp == 0: return 0 # Calculate the absolute values of the samples abs_samples = np.abs(float_samples) # Calculate the average amplitude avg_amplitude = np.mean(abs_samples) return avg_amplitude/max_amp def audiosegment_to_numpy_array(audio_segment): # Extract raw data from AudioSegment raw_data = audio_segment.raw_data # Get the number of channels num_channels = audio_segment.channels # Get sample width in bytes sample_width = audio_segment.sample_width # Create a numpy array from the raw data dtype = np.int16 if sample_width == 2 else np.int8 audio_data = np.frombuffer(raw_data, dtype=dtype) # Reshape the array based on the number of channels audio_data = audio_data.reshape((-1, num_channels)) return audio_data def numpy_array_to_audio_segment(samples, sample_width=2, frame_rate=44100, channels=2): # Convert the numpy array to bytes # print(samples.shape) if samples.shape[0] == 2 and samples.dtype == np.float32: samples = np.array([[x, y] for x, y in zip(samples[0], samples[1])], dtype=np.float32) samples = (samples * 32767).astype(np.int16) sample_width = 2 elif samples.dtype == np.float32: # print(samples.shape) samples = (samples * 32767).astype(np.int16) sample_width = 2 raw_data = samples.tobytes() # Create an AudioSegment from the raw byte data audio_segment = AudioSegment( data=raw_data, sample_width=sample_width, frame_rate=frame_rate, channels=channels ) return audio_segment def add_stereo(segments, seg_length, sample_rate): final_audio = numpy_array_to_audio_segment(segments[0], frame_rate=sample_rate, sample_width=4) for segment in segments[1:]: final_audio = final_audio.overlay(numpy_array_to_audio_segment(segment, frame_rate=sample_rate, sample_width=4)) return int16_to_float32(audiosegment_to_numpy_array(final_audio)) # Function to split the audio into 100 parts with different panning, save to files, and calculate average amplitude def split_stereo(segment, num_parts=100, max_amp=0, sr=44100): audio = numpy_array_to_audio_segment(segment, frame_rate=sr) # Calculate the panning step pan_step = 2 / (num_parts - 1) # Range -1 to 1 divided into 100 steps avg_amplitudes = [] audios = [] for i in range(num_parts): # Calculate panning value pan_value = -1 + i * pan_step # Apply panning to the audio panned_audio = audio.pan(pan_value) # Export panned audio to raw data raw_data = panned_audio.raw_data # Convert raw data to numpy array (int16) samples = np.frombuffer(raw_data, dtype=np.int16) # Reshape to (number_of_samples, 2) since it is stereo samples = samples.reshape((-1, 2)) # Convert int 16 to float32 float_samples = int16_to_float32(samples) # Calculate average amplitude for the current part avg_amplitude = calculate_average_amplitude(float_samples, max_amp) avg_amplitudes.append(avg_amplitude) audios.append(float_samples) return avg_amplitudes, np.array(audios)