2024-05-28 15:07:29 +00:00
|
|
|
from pydub import AudioSegment
|
|
|
|
import numpy as np
|
|
|
|
from scipy.io.wavfile import write
|
|
|
|
import time
|
2024-06-01 04:40:38 +00:00
|
|
|
|
2024-05-28 15:07:29 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Function to convert audio samples from int16 to float32
|
|
|
|
def int16_to_float32(samples):
|
|
|
|
return samples.astype(np.float32) / 32768.0
|
|
|
|
|
|
|
|
# Function to convert audio samples from float32 to int16
|
|
|
|
def float32_to_int16(samples):
|
|
|
|
return (samples * 32768).astype(np.int16)
|
|
|
|
|
|
|
|
# Function to calculate the average amplitude of float32 samples
|
|
|
|
def calculate_average_amplitude(float_samples, max_amp):
|
|
|
|
if max_amp == 0:
|
|
|
|
return 0
|
|
|
|
# Calculate the absolute values of the samples
|
|
|
|
abs_samples = np.abs(float_samples)
|
|
|
|
# Calculate the average amplitude
|
|
|
|
avg_amplitude = np.mean(abs_samples)
|
|
|
|
return avg_amplitude/max_amp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def audiosegment_to_numpy_array(audio_segment):
|
|
|
|
# Extract raw data from AudioSegment
|
|
|
|
raw_data = audio_segment.raw_data
|
|
|
|
|
|
|
|
# Get the number of channels
|
|
|
|
num_channels = audio_segment.channels
|
|
|
|
|
|
|
|
# Get sample width in bytes
|
|
|
|
sample_width = audio_segment.sample_width
|
|
|
|
|
|
|
|
# Create a numpy array from the raw data
|
|
|
|
dtype = np.int16 if sample_width == 2 else np.int8
|
|
|
|
audio_data = np.frombuffer(raw_data, dtype=dtype)
|
|
|
|
|
|
|
|
# Reshape the array based on the number of channels
|
|
|
|
audio_data = audio_data.reshape((-1, num_channels))
|
|
|
|
|
|
|
|
return audio_data
|
|
|
|
|
|
|
|
def numpy_array_to_audio_segment(samples, sample_width=2, frame_rate=44100, channels=2):
|
|
|
|
# Convert the numpy array to bytes
|
|
|
|
# print(samples.shape)
|
|
|
|
if samples.shape[0] == 2 and samples.dtype == np.float32:
|
|
|
|
samples = np.array([[x, y] for x, y in zip(samples[0], samples[1])], dtype=np.float32)
|
|
|
|
samples = (samples * 32767).astype(np.int16)
|
|
|
|
sample_width = 2
|
|
|
|
elif samples.dtype == np.float32:
|
|
|
|
# print(samples.shape)
|
|
|
|
samples = (samples * 32767).astype(np.int16)
|
|
|
|
sample_width = 2
|
|
|
|
raw_data = samples.tobytes()
|
|
|
|
# Create an AudioSegment from the raw byte data
|
|
|
|
audio_segment = AudioSegment(
|
|
|
|
data=raw_data,
|
|
|
|
sample_width=sample_width,
|
|
|
|
frame_rate=frame_rate,
|
|
|
|
channels=channels
|
|
|
|
)
|
|
|
|
return audio_segment
|
|
|
|
|
|
|
|
def add_stereo(segments, seg_length, sample_rate):
|
|
|
|
final_audio = numpy_array_to_audio_segment(segments[0], frame_rate=sample_rate, sample_width=4)
|
|
|
|
for segment in segments[1:]:
|
|
|
|
final_audio = final_audio.overlay(numpy_array_to_audio_segment(segment, frame_rate=sample_rate, sample_width=4))
|
|
|
|
return int16_to_float32(audiosegment_to_numpy_array(final_audio))
|
|
|
|
|
|
|
|
|
|
|
|
# Function to split the audio into 100 parts with different panning, save to files, and calculate average amplitude
|
|
|
|
def split_stereo(segment, num_parts=100, max_amp=0, sr=44100):
|
|
|
|
audio = numpy_array_to_audio_segment(segment, frame_rate=sr)
|
|
|
|
# Calculate the panning step
|
|
|
|
pan_step = 2 / (num_parts - 1) # Range -1 to 1 divided into 100 steps
|
|
|
|
|
|
|
|
avg_amplitudes = []
|
|
|
|
audios = []
|
|
|
|
for i in range(num_parts):
|
|
|
|
# Calculate panning value
|
|
|
|
pan_value = -1 + i * pan_step
|
|
|
|
|
|
|
|
# Apply panning to the audio
|
|
|
|
panned_audio = audio.pan(pan_value)
|
|
|
|
|
|
|
|
|
|
|
|
# Export panned audio to raw data
|
|
|
|
raw_data = panned_audio.raw_data
|
|
|
|
|
|
|
|
# Convert raw data to numpy array (int16)
|
|
|
|
samples = np.frombuffer(raw_data, dtype=np.int16)
|
|
|
|
|
|
|
|
# Reshape to (number_of_samples, 2) since it is stereo
|
|
|
|
samples = samples.reshape((-1, 2))
|
|
|
|
# Convert int 16 to float32
|
|
|
|
float_samples = int16_to_float32(samples)
|
|
|
|
|
|
|
|
# Calculate average amplitude for the current part
|
|
|
|
avg_amplitude = calculate_average_amplitude(float_samples, max_amp)
|
|
|
|
avg_amplitudes.append(avg_amplitude)
|
|
|
|
|
|
|
|
audios.append(float_samples)
|
|
|
|
|
|
|
|
return avg_amplitudes, np.array(audios)
|
|
|
|
|