111 lines
3.3 KiB
Python
111 lines
3.3 KiB
Python
from flask import Flask, request, jsonify, render_template
|
|
import librosa
|
|
import numpy as np
|
|
import tempfile
|
|
import os
|
|
|
|
app = Flask(__name__)
|
|
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB limit
|
|
|
|
ALLOWED_EXTENSIONS = {'.mp3', '.wav', '.ogg', '.flac', '.m4a'}
|
|
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
|
|
@app.route('/analyze', methods=['POST'])
|
|
def analyze():
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
file = request.files['file']
|
|
if not file or file.filename == '':
|
|
return jsonify({'error': 'No file selected'}), 400
|
|
|
|
ext = os.path.splitext(file.filename)[1].lower()
|
|
if ext not in ALLOWED_EXTENSIONS:
|
|
return jsonify({'error': f'Unsupported file type. Allowed: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
|
|
|
|
tmp_path = None
|
|
try:
|
|
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
|
|
file.save(tmp.name)
|
|
tmp_path = tmp.name
|
|
|
|
result = analyze_audio(tmp_path)
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
return jsonify({'error': f'Analysis failed: {str(e)}'}), 500
|
|
finally:
|
|
if tmp_path and os.path.exists(tmp_path):
|
|
os.unlink(tmp_path)
|
|
|
|
|
|
def analyze_audio(file_path, max_points=20000):
|
|
# Load audio as mono at 22050 Hz
|
|
y, sr = librosa.load(file_path, mono=True, sr=22050)
|
|
|
|
n_fft = 2048
|
|
hop_length = 512
|
|
|
|
# Short-Time Fourier Transform
|
|
S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
|
|
magnitude = np.abs(S) # shape: (n_freq_bins, n_frames)
|
|
|
|
times = librosa.frames_to_time(
|
|
np.arange(magnitude.shape[1]), sr=sr, hop_length=hop_length
|
|
)
|
|
freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
|
|
|
|
# Build flat coordinate arrays using a meshgrid
|
|
freq_grid, time_grid = np.meshgrid(freqs, times, indexing='ij')
|
|
flat_mag = magnitude.flatten()
|
|
flat_t = time_grid.flatten()
|
|
flat_f = freq_grid.flatten()
|
|
|
|
# Remove noise floor — keep top 25% loudest points
|
|
threshold = np.percentile(flat_mag, 75)
|
|
mask = flat_mag >= threshold
|
|
flat_mag = flat_mag[mask]
|
|
flat_t = flat_t[mask]
|
|
flat_f = flat_f[mask]
|
|
|
|
# If still too many, keep top N by magnitude
|
|
if len(flat_mag) > max_points:
|
|
top_idx = np.argpartition(flat_mag, -max_points)[-max_points:]
|
|
flat_mag = flat_mag[top_idx]
|
|
flat_t = flat_t[top_idx]
|
|
flat_f = flat_f[top_idx]
|
|
|
|
# Normalize each dimension to [0, 1]
|
|
def norm(arr):
|
|
lo, hi = arr.min(), arr.max()
|
|
return (arr - lo) / (hi - lo + 1e-10)
|
|
|
|
t_norm = norm(flat_t)
|
|
f_norm = norm(flat_f)
|
|
a_norm = norm(flat_mag)
|
|
|
|
return {
|
|
# Raw values (for display / audio sync)
|
|
't': flat_t.round(4).tolist(),
|
|
'f': flat_f.round(2).tolist(),
|
|
'a': flat_mag.round(6).tolist(),
|
|
# Normalized values [0,1] used for 3D coordinates
|
|
'tn': t_norm.round(6).tolist(),
|
|
'fn': f_norm.round(6).tolist(),
|
|
'an': a_norm.round(6).tolist(),
|
|
# Metadata
|
|
'duration': round(float(times[-1]), 3),
|
|
'sampleRate': int(sr),
|
|
'pointCount': len(flat_t),
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# NOTE: MP3 support requires ffmpeg to be installed and on PATH.
|
|
# WAV files work out of the box.
|
|
app.run(debug=True, port=5000)
|