Finalizing server

2024-06-01 10:10:38 +05:30 · 2024-06-01 10:10:38 +05:30 · 41ec940ac8
commit 41ec940ac8
parent 2095d0f5ff
3 changed files with 218 additions and 2 deletions
--- a/Utils.py
+++ b/Utils.py
@ -0,0 +1,118 @@
+import librosa
+import numpy as np
+import torch
+import os
+from scipy.io.wavfile import write, read
+from model import BigramLanguageModel
+from divideStereo import split_stereo, add_stereo
+
+class ProceesAudio():
+    audio_data = []
+
+    final_audio = []
+
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+    target_audio = []
+
+    def _split_audio_s(self, audio, sr, segment_length=0.5, overlap=0):
+        # Calculate segment and overlap samples
+        segment_samples = int(segment_length * sr)
+        overlap_samples = int(segment_samples * overlap)
+
+        # Split the stereo audio into segments while preserving stereo channels
+        segments = []
+        for start in range(0, audio.shape[1], segment_samples - overlap_samples):
+            segment = audio[:, start:start + segment_samples]
+            if segment.shape[1] == segment_samples:
+                segments.append(segment)
+
+        return segments, sr
+
+    def _split_audio(self, file_path, segment_length=0.1, overlap=0):
+        audio, sr = librosa.load(file_path, sr=None)
+        segment_samples = int(segment_length * sr)
+        overlap_samples = int(segment_samples * overlap)
+        segments = []
+        for start in range(0, len(audio), segment_samples - overlap_samples):
+            segment = audio[start:start + segment_samples]
+            if len(segment) == segment_samples:
+                segments.append(segment)
+        return segments, sr
+
+    def _calculate_average_amplitude(self, segments, sr, n_fft=2048, hop_length=256, num_frequency_bands=100):
+        # for segment in segments:
+        #     stft = librosa.stft(segment, n_fft=n_fft, hop_length=hop_length)
+        #     magnitude = np.abs(stft)
+        #     max_amplitude = max(max_amplitude, np.max(magnitude))
+        ret=[]
+        audios = []
+        max_amp = 0
+        for segment in segments:
+            max_amp = max(max_amp, np.max(np.abs(segment)))
+        for segment in segments:
+            amps, _ = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10)
+            ret.append(amps) 
+        return ret, audios
+    
+    def _get_output_amps(self, input_amps, index):
+        model = BigramLanguageModel()
+        model.to("cpu", dtype=float)
+        model.load_state_dict(torch.load('amp_net.pth', map_location=torch.device('cpu')))
+        return model.generate(torch.tensor(input_amps[:index+1]).view(1, index+1, len(input_amps[0])).to(self.device), len(input_amps)-index + 1)
+
+    def make_smooth(self, audio, gain, prev_gain):
+        smooth_index = 1000
+        mult_arr_in = np.linspace(prev_gain, gain, num=smooth_index)
+        for i in range(smooth_index):
+            audio[:smooth_index][i] *= mult_arr_in[i]
+        audio[smooth_index:] *= gain
+        return audio
+
+
+    def perform_modulation(self, data, sr, index):
+        segments, sr = self._split_audio_s(data, sr=sr)
+        max_amp = 0 
+        for segment in segments:
+            max_amp = max(max_amp, np.max(np.abs(segment)))
+        
+        x, _ = self._calculate_average_amplitude(segments=segments, sr=sr)
+        #print(x.shape)
+        y = self._get_output_amps(x, index)
+        modified_segs = []
+        prev_gains = np.ones(10)
+        for segment, mod in zip(segments, y[0]):
+            _, audios = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10)
+            final_audios = []
+            curr_gains = []
+            
+            for audio, target_amp, i in zip(audios, mod, range(10)):
+                
+                gain = (target_amp.item()/np.mean(np.abs(audio)))*max_amp
+                
+                if np.mean(np.abs(audio)) == 0:
+                    gain=0
+                elif gain <= 50:
+                    gain = gain/50
+                else:
+                    gain=1
+
+                audio = self.make_smooth(audio, gain, prev_gains[i])
+                curr_gains.append(gain)
+                final_audios.append(audio) 
+            
+            prev_gains = curr_gains
+            modified_seg = add_stereo(final_audios, len(final_audios[0]), sample_rate=sr)
+            modified_segs.append(modified_seg)
+
+        modified_segs = np.concatenate(modified_segs)
+        return modified_segs.astype(np.float32)
+                
+    def get_training_data(self,  file_path, data_dir):
+        for song in os.listdir(data_dir):
+            self.audio_data = []
+            self._process_main(os.path.join(data_dir, song))
+            audio_data = np.array(self.audio_data)
+            tensor_data = torch.tensor(audio_data, dtype=torch.float32)
+            torch.save(tensor_data, file_path + '/' + song + '.data.pt')
+
--- a/divideStereo.py
+++ b/divideStereo.py
@ -2,8 +2,7 @@ from pydub import AudioSegment
 import numpy as np
 from scipy.io.wavfile import write
 import time
-# Load the audio file
-audio = AudioSegment.from_file("sitare.wav")
+

 # Ensure the audio is stereo
 if audio.channels != 2:
--- a/server.py
+++ b/server.py
@ -0,0 +1,99 @@
+from flask import Flask, request, send_file, jsonify
+import io
+from scipy.io import wavfile
+import numpy as np
+import librosa
+from Utils import ProceesAudio
+import pyrebase
+import soundfile as sf
+import datetime
+from flask_jwt_extended import JWTManager, jwt_required, create_access_token, get_jwt_identity
+
+app = Flask(__name__)
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 16 MB limit
+
+
+# Firebase configuration
+firebase_config = {
+  "apiKey": "AIzaSyBqDZlqD7UOBvt2zsk9OLWKH1Lc3_f_VJM",
+  "authDomain": "modifier-4088b.firebaseapp.com",
+  "projectId": "modifier-4088b",
+  "storageBucket": "modifier-4088b.appspot.com",
+  "messagingSenderId": "237119475630",
+  "appId": "1:237119475630:web:6c96c38c61285f5fcb823f",
+  "measurementId": "G-6CWLQMT2Q3",
+  "databaseURL": "https://modifier-4088b.firebaseio.com",
+}
+
+firebase = pyrebase.initialize_app(firebase_config)
+storage = firebase.storage()
+
+@app.route('/process_and_upload', methods=['POST'])
+
+def upload_to_firebase(processed_data, userId):
+    # Create an in-memory bytes buffer
+    buffer = io.BytesIO()
+
+    # Write processed data to the buffer as a WAV file
+    sf.write(buffer, processed_data, 44100, format='WAV')
+    buffer.seek(0)  # Rewind the buffer
+
+    # Upload the buffer to Firebase Storage
+    storage_path = f'uploads/processed_audio_{userId}.wav'
+    storage.child(storage_path).put(buffer, f'processed_audio_{userId}.wav')
+
+    # Get the URL of the uploaded file
+    file_url = storage.child(storage_path).get_url(None)
+
+    return file_url
+
+
+def int16_to_float32(samples):
+    return samples.astype(np.float32) / 32768.0
+
+def process_audio_bytes(audio_bytes):
+    # Read the audio file from bytes
+    sample_rate, data = wavfile.read(io.BytesIO(audio_bytes))
+    left = []
+    right = []
+    for frame in data:
+        frame = int16_to_float32(frame)
+        left.append(frame[0])
+        right.append(frame[1])
+    data = np.array([left, right], dtype=np.float32)
+
+    pa = ProceesAudio()
+
+    processed_data = pa.perform_modulation(data=data, sr=sample_rate, index=0)
+    file_url = upload_to_firebase(processed_data=processed_data, userId="parth")
+    arr_to_show = []
+
+    acc_factor = int(len(processed_data)/150)
+
+    for i in range(0, len(processed_data), acc_factor):
+        arr_to_show.append(np.mean(np.abs(processed_data[i:i+acc_factor])))
+
+    for i in range(len(arr_to_show)):
+        arr_to_show[i] = float(arr_to_show[i])
+
+    return jsonify({"file_url": file_url, "array": arr_to_show})
+
+@app.route('/modify', methods=['POST'])
+def modify():
+    if 'song' not in request.files:
+        return 'No file part', 400
+    file = request.files['song']
+    if file.filename == '':
+        return 'No selected file', 400
+    if file:
+        # Read file bytes
+        file_bytes = file.read()
+        
+        # Process the audio bytes
+        response = process_audio_bytes(file_bytes)
+        response.headers.add('Access-Control-Allow-Origin', '*')
+        return response
+        
+
+if __name__ == '__main__':
+    app.run(debug=True, port=8000)