from flask import Flask, request, jsonify, render_template import librosa import numpy as np import tempfile import os app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB limit ALLOWED_EXTENSIONS = {'.mp3', '.wav', '.ogg', '.flac', '.m4a'} @app.route('/') def index(): return render_template('index.html') @app.route('/analyze', methods=['POST']) def analyze(): if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] if not file or file.filename == '': return jsonify({'error': 'No file selected'}), 400 ext = os.path.splitext(file.filename)[1].lower() if ext not in ALLOWED_EXTENSIONS: return jsonify({'error': f'Unsupported file type. Allowed: {", ".join(ALLOWED_EXTENSIONS)}'}), 400 tmp_path = None try: with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: file.save(tmp.name) tmp_path = tmp.name result = analyze_audio(tmp_path) return jsonify(result) except Exception as e: return jsonify({'error': f'Analysis failed: {str(e)}'}), 500 finally: if tmp_path and os.path.exists(tmp_path): os.unlink(tmp_path) def analyze_audio(file_path, max_points=20000): # Load audio as mono at 22050 Hz y, sr = librosa.load(file_path, mono=True, sr=22050) n_fft = 2048 hop_length = 512 # Short-Time Fourier Transform S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length) magnitude = np.abs(S) # shape: (n_freq_bins, n_frames) times = librosa.frames_to_time( np.arange(magnitude.shape[1]), sr=sr, hop_length=hop_length ) freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft) # Build flat coordinate arrays using a meshgrid freq_grid, time_grid = np.meshgrid(freqs, times, indexing='ij') flat_mag = magnitude.flatten() flat_t = time_grid.flatten() flat_f = freq_grid.flatten() # Remove noise floor — keep top 25% loudest points threshold = np.percentile(flat_mag, 75) mask = flat_mag >= threshold flat_mag = flat_mag[mask] flat_t = flat_t[mask] flat_f = flat_f[mask] # If still too many, keep top N by magnitude if len(flat_mag) > max_points: top_idx = np.argpartition(flat_mag, -max_points)[-max_points:] flat_mag = flat_mag[top_idx] flat_t = flat_t[top_idx] flat_f = flat_f[top_idx] # Normalize each dimension to [0, 1] def norm(arr): lo, hi = arr.min(), arr.max() return (arr - lo) / (hi - lo + 1e-10) t_norm = norm(flat_t) f_norm = norm(flat_f) a_norm = norm(flat_mag) return { # Raw values (for display / audio sync) 't': flat_t.round(4).tolist(), 'f': flat_f.round(2).tolist(), 'a': flat_mag.round(6).tolist(), # Normalized values [0,1] used for 3D coordinates 'tn': t_norm.round(6).tolist(), 'fn': f_norm.round(6).tolist(), 'an': a_norm.round(6).tolist(), # Metadata 'duration': round(float(times[-1]), 3), 'sampleRate': int(sr), 'pointCount': len(flat_t), } if __name__ == '__main__': # NOTE: MP3 support requires ffmpeg to be installed and on PATH. # WAV files work out of the box. app.run(debug=True, port=5000)