From 1352c62ec14eba129298ef947afd5cf039c18ec9 Mon Sep 17 00:00:00 2001 From: F04C Date: Sun, 3 May 2026 12:05:13 +0800 Subject: [PATCH] init --- .gitignore | 1 + app.py | 110 +++++++ requirements.txt | 3 + templates/index.html | 680 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 794 insertions(+) create mode 100644 .gitignore create mode 100644 app.py create mode 100644 requirements.txt create mode 100644 templates/index.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e1fa2d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.md \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..a3998f8 --- /dev/null +++ b/app.py @@ -0,0 +1,110 @@ +from flask import Flask, request, jsonify, render_template +import librosa +import numpy as np +import tempfile +import os + +app = Flask(__name__) +app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB limit + +ALLOWED_EXTENSIONS = {'.mp3', '.wav', '.ogg', '.flac', '.m4a'} + + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/analyze', methods=['POST']) +def analyze(): + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + if not file or file.filename == '': + return jsonify({'error': 'No file selected'}), 400 + + ext = os.path.splitext(file.filename)[1].lower() + if ext not in ALLOWED_EXTENSIONS: + return jsonify({'error': f'Unsupported file type. Allowed: {", ".join(ALLOWED_EXTENSIONS)}'}), 400 + + tmp_path = None + try: + with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: + file.save(tmp.name) + tmp_path = tmp.name + + result = analyze_audio(tmp_path) + return jsonify(result) + except Exception as e: + return jsonify({'error': f'Analysis failed: {str(e)}'}), 500 + finally: + if tmp_path and os.path.exists(tmp_path): + os.unlink(tmp_path) + + +def analyze_audio(file_path, max_points=20000): + # Load audio as mono at 22050 Hz + y, sr = librosa.load(file_path, mono=True, sr=22050) + + n_fft = 2048 + hop_length = 512 + + # Short-Time Fourier Transform + S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length) + magnitude = np.abs(S) # shape: (n_freq_bins, n_frames) + + times = librosa.frames_to_time( + np.arange(magnitude.shape[1]), sr=sr, hop_length=hop_length + ) + freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft) + + # Build flat coordinate arrays using a meshgrid + freq_grid, time_grid = np.meshgrid(freqs, times, indexing='ij') + flat_mag = magnitude.flatten() + flat_t = time_grid.flatten() + flat_f = freq_grid.flatten() + + # Remove noise floor — keep top 25% loudest points + threshold = np.percentile(flat_mag, 75) + mask = flat_mag >= threshold + flat_mag = flat_mag[mask] + flat_t = flat_t[mask] + flat_f = flat_f[mask] + + # If still too many, keep top N by magnitude + if len(flat_mag) > max_points: + top_idx = np.argpartition(flat_mag, -max_points)[-max_points:] + flat_mag = flat_mag[top_idx] + flat_t = flat_t[top_idx] + flat_f = flat_f[top_idx] + + # Normalize each dimension to [0, 1] + def norm(arr): + lo, hi = arr.min(), arr.max() + return (arr - lo) / (hi - lo + 1e-10) + + t_norm = norm(flat_t) + f_norm = norm(flat_f) + a_norm = norm(flat_mag) + + return { + # Raw values (for display / audio sync) + 't': flat_t.round(4).tolist(), + 'f': flat_f.round(2).tolist(), + 'a': flat_mag.round(6).tolist(), + # Normalized values [0,1] used for 3D coordinates + 'tn': t_norm.round(6).tolist(), + 'fn': f_norm.round(6).tolist(), + 'an': a_norm.round(6).tolist(), + # Metadata + 'duration': round(float(times[-1]), 3), + 'sampleRate': int(sr), + 'pointCount': len(flat_t), + } + + +if __name__ == '__main__': + # NOTE: MP3 support requires ffmpeg to be installed and on PATH. + # WAV files work out of the box. + app.run(debug=True, port=5000) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4c183f5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +flask +librosa +numpy diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..bf05cc0 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,680 @@ + + + + + + Bird Audio Visualizer + + + + + + +
+ +
+ +
+
ANALYZING AUDIO
+
+
0%
+
+ +
Bird Audio Visualizer
+ +
+
🐦
+
Upload an audio file to begin
+
+ +
+

Audio Info

+
+
+
FREQUENCY → COLOR
+
+
LowHigh
+
+
+ X axis → Time
+ Y axis → Frequency
+ Z axis → Amplitude +
+
+ +
+ + + + +
+ No file selected +
+ + + +