From bb766dbbc09fa0ea40b233da7a0577c2a36871a6 Mon Sep 17 00:00:00 2001 From: parth aranke Date: Thu, 6 Jun 2024 11:20:23 +0530 Subject: [PATCH] Fixing bug for part gains --- Utils.py | 70 +++++++++++++++++++++++++++++++------------------------ server.py | 52 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 33 deletions(-) diff --git a/Utils.py b/Utils.py index 21feb5d..6a8d665 100644 --- a/Utils.py +++ b/Utils.py @@ -13,6 +13,8 @@ class ProceesAudio(): device = 'cuda' if torch.cuda.is_available() else 'cpu' + segments = [] + target_audio = [] def _split_audio_s(self, audio, sr, segment_length=0.5, overlap=0): @@ -38,6 +40,7 @@ class ProceesAudio(): segment = audio[start:start + segment_samples] if len(segment) == segment_samples: segments.append(segment) + return segments, sr def _calculate_average_amplitude(self, segments, sr, n_fft=2048, hop_length=256, num_frequency_bands=100): @@ -70,6 +73,40 @@ class ProceesAudio(): return audio + def change_amps(self, new_amps, sr, data): + modified_segs = [] + prev_gains = np.ones(10) + segments, sr = self._split_audio_s(data, sr=sr) + max_amp = 0 + part_amps = [] + for segment in segments: + max_amp = max(max_amp, np.max(np.abs(segment))) + for segment, mod in zip(segments, new_amps): + _, audios = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10) + final_audios = [] + curr_gains = [] + part_amp = [] + for audio, target_amp, i in zip(audios, mod, range(10)): + gain = (target_amp.item()/np.mean(np.abs(audio)))*max_amp + + if np.mean(np.abs(audio)) == 0: + gain=0 + elif gain <= 50 and gain >=1: + gain = gain/50 + else: + gain=1 + part_amp.append(gain) + audio = self.make_smooth(audio, gain, prev_gains[i]) + curr_gains.append(gain) + final_audios.append(audio) + part_amps.append(part_amp) + prev_gains = curr_gains + modified_seg = add_stereo(final_audios, len(final_audios[0]), sample_rate=sr) + modified_segs.append(modified_seg) + + modified_segs = np.concatenate(modified_segs) + return modified_segs, part_amps + def perform_modulation(self, data, sr, index): segments, sr = self._split_audio_s(data, sr=sr) max_amp = 0 @@ -81,37 +118,10 @@ class ProceesAudio(): index = int((int(index)/100)*len(x)) y = self._get_output_amps(x, index) - print(len(y), len(x)) - modified_segs = [] - prev_gains = np.ones(10) - for segment, mod in zip(segments, y[0]): - _, audios = split_stereo(segment=segment, max_amp=max_amp, sr=sr, num_parts=10) - final_audios = [] - curr_gains = [] - - for audio, target_amp, i in zip(audios, mod, range(10)): - gain = (target_amp.item()/np.mean(np.abs(audio)))*max_amp - - if np.mean(np.abs(audio)) == 0: - gain=0 - elif gain <= 50 and gain >=1: - gain = gain/50 - else: - gain=1 + + y[0][index+1:] *= max_amp - audio = self.make_smooth(audio, gain, prev_gains[i]) - curr_gains.append(gain) - final_audios.append(audio) - - prev_gains = curr_gains - modified_seg = add_stereo(final_audios, len(final_audios[0]), sample_rate=sr) - modified_segs.append(modified_seg) - - modified_segs[:index+1] = segments[:index+1] - - - modified_segs = np.concatenate(modified_segs) - return modified_segs.astype(np.float32), y[0] + return self.change_amps(new_amps=y[0], sr=sr, data=data) def get_training_data(self, file_path, data_dir): for song in os.listdir(data_dir): diff --git a/server.py b/server.py index 0b1217e..755d6d9 100644 --- a/server.py +++ b/server.py @@ -51,8 +51,7 @@ def upload_to_s3(processed_data, userId): def int16_to_float32(samples): return samples.astype(np.float32) / 32768.0 -def process_audio_bytes(audio_bytes, index): - # Read the audio file from bytes +def get_audio_data(audio_bytes): sample_rate, data = wavfile.read(io.BytesIO(audio_bytes)) left = [] right = [] @@ -61,6 +60,11 @@ def process_audio_bytes(audio_bytes, index): left.append(frame[0]) right.append(frame[1]) data = np.array([left, right], dtype=np.float32) + return sample_rate, data + +def process_audio_bytes(audio_bytes, index): + # Read the audio file from bytes + sample_rate, data = get_audio_data(audio_bytes=audio_bytes) pa = ProceesAudio() @@ -68,6 +72,8 @@ def process_audio_bytes(audio_bytes, index): file_url = upload_to_s3(processed_data=processed_data, userId="parth") arr_to_show = [] + print(len(processed_data)) + acc_factor = int(len(processed_data)/150) for i in range(0, len(processed_data), acc_factor): @@ -76,7 +82,14 @@ def process_audio_bytes(audio_bytes, index): for i in range(len(arr_to_show)): arr_to_show[i] = float(arr_to_show[i]) - return jsonify({"file_url": file_url, "array": arr_to_show, "part_amps": float(part_amps)}) + ret = [] + for i in range(10): + temp = [] + for part in part_amps: + temp.append(float(part[i])) + ret.append(temp) + + return jsonify({"file_url": "", "array": arr_to_show, "part_amps": ret}) @app.route('/modify', methods=['POST']) def modify(): @@ -95,6 +108,39 @@ def modify(): response.headers.add('Access-Control-Allow-Origin', '*') return response +@app.route('/manMod', methods=['POST']) +def manual_modify(): + if 'song' not in request.files: + return 'No file part', 400 + file = request.files['song'] + index = request.form.get('index') # Assuming index is sent as a form field + mod_arr = request.form.get('mod_arr') + if file.filename == '': + return 'No selected file', 400 + if file: + # Read file bytes + file_bytes = file.read() + # Process the audio bytes with the index parameter + sample_rate, data = get_audio_data(audio_bytes=file_bytes) + pa = ProceesAudio() + processed_data = pa.change_amps(data=data, new_amps=mod_arr, sr=sample_rate) + + file_url = upload_to_s3(processed_data=processed_data, userId="parth") + arr_to_show = [] + + acc_factor = int(len(processed_data)/150) + + for i in range(0, len(processed_data), acc_factor): + arr_to_show.append(np.mean(np.abs(processed_data[i:i+acc_factor]))) + + for i in range(len(arr_to_show)): + arr_to_show[i] = float(arr_to_show[i]) + + response = jsonify({"file_url": file_url, "array": arr_to_show}) + + response.headers.add('Access-Control-Allow-Origin', '*') + return response + if __name__ == '__main__': app.run(debug=True, port=80)