smodifier/divideStereo.py
2024-06-01 11:11:58 +05:30

110 lines
3.7 KiB
Python

from pydub import AudioSegment
import numpy as np
from scipy.io.wavfile import write
import time
# Function to convert audio samples from int16 to float32
def int16_to_float32(samples):
return samples.astype(np.float32) / 32768.0
# Function to convert audio samples from float32 to int16
def float32_to_int16(samples):
return (samples * 32768).astype(np.int16)
# Function to calculate the average amplitude of float32 samples
def calculate_average_amplitude(float_samples, max_amp):
if max_amp == 0:
return 0
# Calculate the absolute values of the samples
abs_samples = np.abs(float_samples)
# Calculate the average amplitude
avg_amplitude = np.mean(abs_samples)
return avg_amplitude/max_amp
def audiosegment_to_numpy_array(audio_segment):
# Extract raw data from AudioSegment
raw_data = audio_segment.raw_data
# Get the number of channels
num_channels = audio_segment.channels
# Get sample width in bytes
sample_width = audio_segment.sample_width
# Create a numpy array from the raw data
dtype = np.int16 if sample_width == 2 else np.int8
audio_data = np.frombuffer(raw_data, dtype=dtype)
# Reshape the array based on the number of channels
audio_data = audio_data.reshape((-1, num_channels))
return audio_data
def numpy_array_to_audio_segment(samples, sample_width=2, frame_rate=44100, channels=2):
# Convert the numpy array to bytes
# print(samples.shape)
if samples.shape[0] == 2 and samples.dtype == np.float32:
samples = np.array([[x, y] for x, y in zip(samples[0], samples[1])], dtype=np.float32)
samples = (samples * 32767).astype(np.int16)
sample_width = 2
elif samples.dtype == np.float32:
# print(samples.shape)
samples = (samples * 32767).astype(np.int16)
sample_width = 2
raw_data = samples.tobytes()
# Create an AudioSegment from the raw byte data
audio_segment = AudioSegment(
data=raw_data,
sample_width=sample_width,
frame_rate=frame_rate,
channels=channels
)
return audio_segment
def add_stereo(segments, seg_length, sample_rate):
final_audio = numpy_array_to_audio_segment(segments[0], frame_rate=sample_rate, sample_width=4)
for segment in segments[1:]:
final_audio = final_audio.overlay(numpy_array_to_audio_segment(segment, frame_rate=sample_rate, sample_width=4))
return int16_to_float32(audiosegment_to_numpy_array(final_audio))
# Function to split the audio into 100 parts with different panning, save to files, and calculate average amplitude
def split_stereo(segment, num_parts=100, max_amp=0, sr=44100):
audio = numpy_array_to_audio_segment(segment, frame_rate=sr)
# Calculate the panning step
pan_step = 2 / (num_parts - 1) # Range -1 to 1 divided into 100 steps
avg_amplitudes = []
audios = []
for i in range(num_parts):
# Calculate panning value
pan_value = -1 + i * pan_step
# Apply panning to the audio
panned_audio = audio.pan(pan_value)
# Export panned audio to raw data
raw_data = panned_audio.raw_data
# Convert raw data to numpy array (int16)
samples = np.frombuffer(raw_data, dtype=np.int16)
# Reshape to (number_of_samples, 2) since it is stereo
samples = samples.reshape((-1, 2))
# Convert int 16 to float32
float_samples = int16_to_float32(samples)
# Calculate average amplitude for the current part
avg_amplitude = calculate_average_amplitude(float_samples, max_amp)
avg_amplitudes.append(avg_amplitude)
audios.append(float_samples)
return avg_amplitudes, np.array(audios)