Finalizing server
This commit is contained in:
parent
2095d0f5ff
commit
41ec940ac8
118
Utils.py
Normal file
118
Utils.py
Normal file
@ -0,0 +1,118 @@
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import os
|
||||
from scipy.io.wavfile import write, read
|
||||
from model import BigramLanguageModel
|
||||
from divideStereo import split_stereo, add_stereo
|
||||
|
||||
class ProceesAudio():
|
||||
audio_data = []
|
||||
|
||||
final_audio = []
|
||||
|
||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
|
||||
target_audio = []
|
||||
|
||||
def _split_audio_s(self, audio, sr, segment_length=0.5, overlap=0):
|
||||
# Calculate segment and overlap samples
|
||||
segment_samples = int(segment_length * sr)
|
||||
overlap_samples = int(segment_samples * overlap)
|
||||
|
||||
# Split the stereo audio into segments while preserving stereo channels
|
||||
segments = []
|
||||
for start in range(0, audio.shape[1], segment_samples - overlap_samples):
|
||||
segment = audio[:, start:start + segment_samples]
|
||||
if segment.shape[1] == segment_samples:
|
||||
segments.append(segment)
|
||||
|
||||
return segments, sr
|
||||
|
||||
def _split_audio(self, file_path, segment_length=0.1, overlap=0):
|
||||
audio, sr = librosa.load(file_path, sr=None)
|
||||
segment_samples = int(segment_length * sr)
|
||||
overlap_samples = int(segment_samples * overlap)
|
||||
segments = []
|
||||
for start in range(0, len(audio), segment_samples - overlap_samples):
|
||||
segment = audio[start:start + segment_samples]
|
||||
if len(segment) == segment_samples:
|
||||
segments.append(segment)
|
||||
return segments, sr
|
||||
|
||||
def _calculate_average_amplitude(self, segments, sr, n_fft=2048, hop_length=256, num_frequency_bands=100):
|
||||
# for segment in segments:
|
||||
# stft = librosa.stft(segment, n_fft=n_fft, hop_length=hop_length)
|
||||
# magnitude = np.abs(stft)
|
||||
# max_amplitude = max(max_amplitude, np.max(magnitude))
|
||||
ret=[]
|
||||
audios = []
|
||||
max_amp = 0
|
||||
for segment in segments:
|
||||
max_amp = max(max_amp, np.max(np.abs(segment)))
|
||||
for segment in segments:
|
||||
amps, _ = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10)
|
||||
ret.append(amps)
|
||||
return ret, audios
|
||||
|
||||
def _get_output_amps(self, input_amps, index):
|
||||
model = BigramLanguageModel()
|
||||
model.to("cpu", dtype=float)
|
||||
model.load_state_dict(torch.load('amp_net.pth', map_location=torch.device('cpu')))
|
||||
return model.generate(torch.tensor(input_amps[:index+1]).view(1, index+1, len(input_amps[0])).to(self.device), len(input_amps)-index + 1)
|
||||
|
||||
def make_smooth(self, audio, gain, prev_gain):
|
||||
smooth_index = 1000
|
||||
mult_arr_in = np.linspace(prev_gain, gain, num=smooth_index)
|
||||
for i in range(smooth_index):
|
||||
audio[:smooth_index][i] *= mult_arr_in[i]
|
||||
audio[smooth_index:] *= gain
|
||||
return audio
|
||||
|
||||
|
||||
def perform_modulation(self, data, sr, index):
|
||||
segments, sr = self._split_audio_s(data, sr=sr)
|
||||
max_amp = 0
|
||||
for segment in segments:
|
||||
max_amp = max(max_amp, np.max(np.abs(segment)))
|
||||
|
||||
x, _ = self._calculate_average_amplitude(segments=segments, sr=sr)
|
||||
#print(x.shape)
|
||||
y = self._get_output_amps(x, index)
|
||||
modified_segs = []
|
||||
prev_gains = np.ones(10)
|
||||
for segment, mod in zip(segments, y[0]):
|
||||
_, audios = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10)
|
||||
final_audios = []
|
||||
curr_gains = []
|
||||
|
||||
for audio, target_amp, i in zip(audios, mod, range(10)):
|
||||
|
||||
gain = (target_amp.item()/np.mean(np.abs(audio)))*max_amp
|
||||
|
||||
if np.mean(np.abs(audio)) == 0:
|
||||
gain=0
|
||||
elif gain <= 50:
|
||||
gain = gain/50
|
||||
else:
|
||||
gain=1
|
||||
|
||||
audio = self.make_smooth(audio, gain, prev_gains[i])
|
||||
curr_gains.append(gain)
|
||||
final_audios.append(audio)
|
||||
|
||||
prev_gains = curr_gains
|
||||
modified_seg = add_stereo(final_audios, len(final_audios[0]), sample_rate=sr)
|
||||
modified_segs.append(modified_seg)
|
||||
|
||||
modified_segs = np.concatenate(modified_segs)
|
||||
return modified_segs.astype(np.float32)
|
||||
|
||||
def get_training_data(self, file_path, data_dir):
|
||||
for song in os.listdir(data_dir):
|
||||
self.audio_data = []
|
||||
self._process_main(os.path.join(data_dir, song))
|
||||
audio_data = np.array(self.audio_data)
|
||||
tensor_data = torch.tensor(audio_data, dtype=torch.float32)
|
||||
torch.save(tensor_data, file_path + '/' + song + '.data.pt')
|
||||
|
@ -2,8 +2,7 @@ from pydub import AudioSegment
|
||||
import numpy as np
|
||||
from scipy.io.wavfile import write
|
||||
import time
|
||||
# Load the audio file
|
||||
audio = AudioSegment.from_file("sitare.wav")
|
||||
|
||||
|
||||
# Ensure the audio is stereo
|
||||
if audio.channels != 2:
|
||||
|
99
server.py
Normal file
99
server.py
Normal file
@ -0,0 +1,99 @@
|
||||
from flask import Flask, request, send_file, jsonify
|
||||
import io
|
||||
from scipy.io import wavfile
|
||||
import numpy as np
|
||||
import librosa
|
||||
from Utils import ProceesAudio
|
||||
import pyrebase
|
||||
import soundfile as sf
|
||||
import datetime
|
||||
from flask_jwt_extended import JWTManager, jwt_required, create_access_token, get_jwt_identity
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 16 MB limit
|
||||
|
||||
|
||||
# Firebase configuration
|
||||
firebase_config = {
|
||||
"apiKey": "AIzaSyBqDZlqD7UOBvt2zsk9OLWKH1Lc3_f_VJM",
|
||||
"authDomain": "modifier-4088b.firebaseapp.com",
|
||||
"projectId": "modifier-4088b",
|
||||
"storageBucket": "modifier-4088b.appspot.com",
|
||||
"messagingSenderId": "237119475630",
|
||||
"appId": "1:237119475630:web:6c96c38c61285f5fcb823f",
|
||||
"measurementId": "G-6CWLQMT2Q3",
|
||||
"databaseURL": "https://modifier-4088b.firebaseio.com",
|
||||
}
|
||||
|
||||
firebase = pyrebase.initialize_app(firebase_config)
|
||||
storage = firebase.storage()
|
||||
|
||||
@app.route('/process_and_upload', methods=['POST'])
|
||||
|
||||
def upload_to_firebase(processed_data, userId):
|
||||
# Create an in-memory bytes buffer
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Write processed data to the buffer as a WAV file
|
||||
sf.write(buffer, processed_data, 44100, format='WAV')
|
||||
buffer.seek(0) # Rewind the buffer
|
||||
|
||||
# Upload the buffer to Firebase Storage
|
||||
storage_path = f'uploads/processed_audio_{userId}.wav'
|
||||
storage.child(storage_path).put(buffer, f'processed_audio_{userId}.wav')
|
||||
|
||||
# Get the URL of the uploaded file
|
||||
file_url = storage.child(storage_path).get_url(None)
|
||||
|
||||
return file_url
|
||||
|
||||
|
||||
def int16_to_float32(samples):
|
||||
return samples.astype(np.float32) / 32768.0
|
||||
|
||||
def process_audio_bytes(audio_bytes):
|
||||
# Read the audio file from bytes
|
||||
sample_rate, data = wavfile.read(io.BytesIO(audio_bytes))
|
||||
left = []
|
||||
right = []
|
||||
for frame in data:
|
||||
frame = int16_to_float32(frame)
|
||||
left.append(frame[0])
|
||||
right.append(frame[1])
|
||||
data = np.array([left, right], dtype=np.float32)
|
||||
|
||||
pa = ProceesAudio()
|
||||
|
||||
processed_data = pa.perform_modulation(data=data, sr=sample_rate, index=0)
|
||||
file_url = upload_to_firebase(processed_data=processed_data, userId="parth")
|
||||
arr_to_show = []
|
||||
|
||||
acc_factor = int(len(processed_data)/150)
|
||||
|
||||
for i in range(0, len(processed_data), acc_factor):
|
||||
arr_to_show.append(np.mean(np.abs(processed_data[i:i+acc_factor])))
|
||||
|
||||
for i in range(len(arr_to_show)):
|
||||
arr_to_show[i] = float(arr_to_show[i])
|
||||
|
||||
return jsonify({"file_url": file_url, "array": arr_to_show})
|
||||
|
||||
@app.route('/modify', methods=['POST'])
|
||||
def modify():
|
||||
if 'song' not in request.files:
|
||||
return 'No file part', 400
|
||||
file = request.files['song']
|
||||
if file.filename == '':
|
||||
return 'No selected file', 400
|
||||
if file:
|
||||
# Read file bytes
|
||||
file_bytes = file.read()
|
||||
|
||||
# Process the audio bytes
|
||||
response = process_audio_bytes(file_bytes)
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, port=8000)
|
Loading…
Reference in New Issue
Block a user