This commit is contained in:
F04C
2026-05-03 12:05:13 +08:00
commit 1352c62ec1
4 changed files with 794 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
*.md
+110
View File
@@ -0,0 +1,110 @@
from flask import Flask, request, jsonify, render_template
import librosa
import numpy as np
import tempfile
import os
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB limit
ALLOWED_EXTENSIONS = {'.mp3', '.wav', '.ogg', '.flac', '.m4a'}
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze', methods=['POST'])
def analyze():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if not file or file.filename == '':
return jsonify({'error': 'No file selected'}), 400
ext = os.path.splitext(file.filename)[1].lower()
if ext not in ALLOWED_EXTENSIONS:
return jsonify({'error': f'Unsupported file type. Allowed: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
tmp_path = None
try:
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
file.save(tmp.name)
tmp_path = tmp.name
result = analyze_audio(tmp_path)
return jsonify(result)
except Exception as e:
return jsonify({'error': f'Analysis failed: {str(e)}'}), 500
finally:
if tmp_path and os.path.exists(tmp_path):
os.unlink(tmp_path)
def analyze_audio(file_path, max_points=20000):
# Load audio as mono at 22050 Hz
y, sr = librosa.load(file_path, mono=True, sr=22050)
n_fft = 2048
hop_length = 512
# Short-Time Fourier Transform
S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
magnitude = np.abs(S) # shape: (n_freq_bins, n_frames)
times = librosa.frames_to_time(
np.arange(magnitude.shape[1]), sr=sr, hop_length=hop_length
)
freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
# Build flat coordinate arrays using a meshgrid
freq_grid, time_grid = np.meshgrid(freqs, times, indexing='ij')
flat_mag = magnitude.flatten()
flat_t = time_grid.flatten()
flat_f = freq_grid.flatten()
# Remove noise floor — keep top 25% loudest points
threshold = np.percentile(flat_mag, 75)
mask = flat_mag >= threshold
flat_mag = flat_mag[mask]
flat_t = flat_t[mask]
flat_f = flat_f[mask]
# If still too many, keep top N by magnitude
if len(flat_mag) > max_points:
top_idx = np.argpartition(flat_mag, -max_points)[-max_points:]
flat_mag = flat_mag[top_idx]
flat_t = flat_t[top_idx]
flat_f = flat_f[top_idx]
# Normalize each dimension to [0, 1]
def norm(arr):
lo, hi = arr.min(), arr.max()
return (arr - lo) / (hi - lo + 1e-10)
t_norm = norm(flat_t)
f_norm = norm(flat_f)
a_norm = norm(flat_mag)
return {
# Raw values (for display / audio sync)
't': flat_t.round(4).tolist(),
'f': flat_f.round(2).tolist(),
'a': flat_mag.round(6).tolist(),
# Normalized values [0,1] used for 3D coordinates
'tn': t_norm.round(6).tolist(),
'fn': f_norm.round(6).tolist(),
'an': a_norm.round(6).tolist(),
# Metadata
'duration': round(float(times[-1]), 3),
'sampleRate': int(sr),
'pointCount': len(flat_t),
}
if __name__ == '__main__':
# NOTE: MP3 support requires ffmpeg to be installed and on PATH.
# WAV files work out of the box.
app.run(debug=True, port=5000)
+3
View File
@@ -0,0 +1,3 @@
flask
librosa
numpy
+680
View File
@@ -0,0 +1,680 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Bird Audio Visualizer</title>
<style>
*,
*::before,
*::after {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
background: #08080f;
color: #fff;
font-family: "Segoe UI", system-ui, sans-serif;
overflow: hidden;
user-select: none;
}
#canvas-container {
position: fixed;
inset: 0;
}
/* ── Title ─────────────────────────────────────────────────── */
#title {
position: fixed;
top: 22px;
left: 50%;
transform: translateX(-50%);
font-size: 18px;
font-weight: 300;
letter-spacing: 6px;
color: rgba(255, 255, 255, 0.75);
text-transform: uppercase;
pointer-events: none;
}
/* ── Info panel (top-right) ────────────────────────────────── */
#info-panel {
position: fixed;
top: 18px;
right: 20px;
background: rgba(10, 10, 20, 0.75);
border: 1px solid rgba(255, 255, 255, 0.08);
border-radius: 10px;
padding: 14px 18px;
font-size: 13px;
color: rgba(255, 255, 255, 0.55);
min-width: 200px;
backdrop-filter: blur(12px);
display: none;
}
#info-panel h3 {
font-size: 13px;
font-weight: 600;
color: rgba(255, 255, 255, 0.85);
margin-bottom: 10px;
}
#info-panel .row {
display: flex;
justify-content: space-between;
margin-bottom: 4px;
}
#info-panel .val {
color: rgba(255, 255, 255, 0.9);
font-variant-numeric: tabular-nums;
}
.grad-bar {
margin-top: 12px;
border-top: 1px solid rgba(255, 255, 255, 0.07);
padding-top: 10px;
}
.grad-label {
font-size: 11px;
color: rgba(255, 255, 255, 0.35);
margin-bottom: 5px;
}
.grad-strip {
height: 8px;
border-radius: 4px;
background: linear-gradient(
to right,
#0000ff,
#00ffff,
#00ff00,
#ffff00,
#ff0000
);
}
.grad-ends {
display: flex;
justify-content: space-between;
font-size: 11px;
color: rgba(255, 255, 255, 0.35);
margin-top: 3px;
}
.axes-info {
margin-top: 10px;
border-top: 1px solid rgba(255, 255, 255, 0.07);
padding-top: 10px;
font-size: 11px;
color: rgba(255, 255, 255, 0.4);
line-height: 1.7;
}
/* ── Controls bar (bottom) ─────────────────────────────────── */
#controls {
position: fixed;
bottom: 24px;
left: 50%;
transform: translateX(-50%);
display: flex;
align-items: center;
gap: 12px;
background: rgba(8, 8, 18, 0.82);
border: 1px solid rgba(255, 255, 255, 0.08);
border-radius: 14px;
padding: 12px 20px;
backdrop-filter: blur(16px);
z-index: 100;
flex-wrap: wrap;
max-width: 90vw;
}
.btn {
background: rgba(80, 120, 255, 0.18);
border: 1px solid rgba(80, 120, 255, 0.4);
color: #ccd6ff;
padding: 7px 15px;
border-radius: 8px;
cursor: pointer;
font-size: 13px;
transition:
background 0.18s,
border-color 0.18s;
white-space: nowrap;
}
.btn:hover {
background: rgba(80, 120, 255, 0.35);
border-color: rgba(80, 120, 255, 0.7);
}
.btn:disabled {
opacity: 0.4;
cursor: not-allowed;
}
#file-input {
display: none;
}
#status-text {
font-size: 13px;
color: rgba(255, 255, 255, 0.45);
white-space: nowrap;
max-width: 240px;
overflow: hidden;
text-overflow: ellipsis;
}
#loading-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #6699ff;
display: none;
}
#loading-dot.active {
display: block;
animation: blink 0.8s infinite alternate;
}
@keyframes blink {
from {
opacity: 0.2;
}
to {
opacity: 1;
}
}
audio {
height: 34px;
border-radius: 8px;
outline: none;
display: none;
max-width: 240px;
}
audio::-webkit-media-controls-panel {
background: rgba(20, 25, 50, 0.9);
}
/* ── Prompt overlay (before any file is chosen) ────────────── */
#prompt {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
text-align: center;
pointer-events: none;
transition: opacity 0.5s;
}
#prompt .big {
font-size: 52px;
opacity: 0.12;
}
#prompt .sub {
font-size: 14px;
color: rgba(255, 255, 255, 0.2);
margin-top: 10px;
letter-spacing: 2px;
}
#prompt.hidden {
opacity: 0;
}
/* ── Progress bar ──────────────────────────────────────── */
#progress-bar-wrap {
position: fixed;
top: 0;
left: 0;
right: 0;
height: 3px;
background: rgba(255, 255, 255, 0.06);
z-index: 200;
opacity: 0;
transition: opacity 0.2s;
}
#progress-bar-wrap.active {
opacity: 1;
}
#progress-bar {
height: 100%;
width: 0%;
background: linear-gradient(to right, #4488ff, #88ccff);
border-radius: 0 2px 2px 0;
box-shadow: 0 0 8px #4488ff;
transition: width 0.4s ease;
}
/* ── Progress overlay (center) ─────────────────────────── */
#progress-overlay {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
background: rgba(8, 8, 18, 0.88);
border: 1px solid rgba(255, 255, 255, 0.09);
border-radius: 14px;
padding: 28px 36px;
text-align: center;
backdrop-filter: blur(18px);
display: none;
z-index: 300;
min-width: 260px;
}
#progress-overlay.active {
display: block;
}
#progress-label {
font-size: 13px;
color: rgba(255, 255, 255, 0.5);
margin-bottom: 14px;
letter-spacing: 1px;
}
#progress-track {
width: 100%;
height: 6px;
background: rgba(255, 255, 255, 0.08);
border-radius: 3px;
overflow: hidden;
}
#progress-fill {
height: 100%;
width: 0%;
background: linear-gradient(to right, #4488ff, #88ccff);
border-radius: 3px;
box-shadow: 0 0 6px #4488ff;
transition: width 0.35s ease;
}
#progress-pct {
font-size: 12px;
color: rgba(255, 255, 255, 0.3);
margin-top: 8px;
}
</style>
<!-- Three.js r155 via importmap -->
<script type="importmap">
{
"imports": {
"three": "https://cdn.jsdelivr.net/npm/three@0.155.0/build/three.module.min.js",
"three/addons/": "https://cdn.jsdelivr.net/npm/three@0.155.0/examples/jsm/"
}
}
</script>
</head>
<body>
<div id="canvas-container"></div>
<div id="progress-bar-wrap"><div id="progress-bar"></div></div>
<div id="progress-overlay">
<div id="progress-label">ANALYZING AUDIO</div>
<div id="progress-track"><div id="progress-fill"></div></div>
<div id="progress-pct">0%</div>
</div>
<div id="title">Bird Audio Visualizer</div>
<div id="prompt">
<div class="big">🐦</div>
<div class="sub">Upload an audio file to begin</div>
</div>
<div id="info-panel">
<h3>Audio Info</h3>
<div id="info-rows"></div>
<div class="grad-bar">
<div class="grad-label">FREQUENCY → COLOR</div>
<div class="grad-strip"></div>
<div class="grad-ends"><span>Low</span><span>High</span></div>
</div>
<div class="axes-info">
X axis → Time<br />
Y axis → Frequency<br />
Z axis → Amplitude
</div>
</div>
<div id="controls">
<input type="file" id="file-input" accept=".mp3,.wav,.ogg,.flac,.m4a" />
<button class="btn" id="pick-btn">📁 Choose File</button>
<button class="btn" id="analyze-btn" disabled>🔍 Analyze</button>
<audio id="audio-player" controls></audio>
<div id="loading-dot"></div>
<span id="status-text">No file selected</span>
</div>
<script type="module">
import * as THREE from "three";
import { OrbitControls } from "three/addons/controls/OrbitControls.js";
// ── Scene setup ──────────────────────────────────────────────────────────
const container = document.getElementById("canvas-container");
const scene = new THREE.Scene();
scene.background = new THREE.Color(0x08080f);
scene.fog = new THREE.FogExp2(0x08080f, 0.004);
const camera = new THREE.PerspectiveCamera(
60,
innerWidth / innerHeight,
0.1,
2000,
);
camera.position.set(0, 30, 120);
const renderer = new THREE.WebGLRenderer({ antialias: true });
renderer.setPixelRatio(devicePixelRatio);
renderer.setSize(innerWidth, innerHeight);
container.appendChild(renderer.domElement);
const orbitControls = new OrbitControls(camera, renderer.domElement);
orbitControls.enableDamping = true;
orbitControls.dampingFactor = 0.06;
orbitControls.target.set(0, 20, 0);
orbitControls.update();
// Lights
scene.add(new THREE.AmbientLight(0x334466, 3));
const sun = new THREE.DirectionalLight(0xffffff, 2);
sun.position.set(60, 80, 50);
scene.add(sun);
const fill = new THREE.DirectionalLight(0x4466aa, 0.8);
fill.position.set(-60, -20, -50);
scene.add(fill);
// Grid
const grid = new THREE.GridHelper(140, 28, 0x1a1a33, 0x101028);
scene.add(grid);
// Axis arrows
const arrowMat = (hex) => new THREE.MeshBasicMaterial({ color: hex });
function addArrow(dir, origin, length, hex) {
const arr = new THREE.ArrowHelper(
new THREE.Vector3(...dir).normalize(),
new THREE.Vector3(...origin),
length,
hex,
length * 0.08,
length * 0.04,
);
scene.add(arr);
}
addArrow([1, 0, 0], [-52, 0, 0], 12, 0xff5555); // X = time
addArrow([0, 1, 0], [-52, 0, 0], 12, 0x55ff88); // Y = freq
addArrow([0, 0, -1], [-52, 0, 0], 12, 0x5599ff); // Z = amp
// Time cursor
const cursorGeo = new THREE.BoxGeometry(0.35, 65, 0.35);
const cursorMat = new THREE.MeshBasicMaterial({
color: 0xffffff,
transparent: true,
opacity: 0.25,
});
const timeCursor = new THREE.Mesh(cursorGeo, cursorMat);
timeCursor.position.set(-50, 27, 0);
timeCursor.visible = false;
scene.add(timeCursor);
// Audio visualization mesh (reused across analyses)
let audioMesh = null;
let audioData = null; // full dataset kept for real-time reveal
// ── Build instanced visualization ────────────────────────────────────────
// All instances are pre-loaded but initially count=0 so nothing is drawn.
// The animation loop reveals them as the audio plays.
function buildVisualization(data) {
if (audioMesh) {
scene.remove(audioMesh);
audioMesh.geometry.dispose();
audioMesh.material.dispose();
audioMesh = null;
}
// Sort all arrays by raw time so count-based reveal is correct
const order = Array.from({ length: data.t.length }, (_, i) => i).sort(
(a, b) => data.t[a] - data.t[b],
);
const sorted = {};
for (const key of ["t", "f", "a", "tn", "fn", "an"])
sorted[key] = order.map((i) => data[key][i]);
sorted.duration = data.duration;
sorted.sampleRate = data.sampleRate;
sorted.pointCount = data.pointCount;
data = sorted;
audioData = data;
const n = data.tn.length;
const geo = new THREE.SphereGeometry(0.22, 5, 4);
const mat = new THREE.MeshPhongMaterial({
vertexColors: true,
shininess: 60,
});
audioMesh = new THREE.InstancedMesh(geo, mat, n);
audioMesh.instanceMatrix.setUsage(THREE.DynamicDrawUsage);
audioMesh.count = 0; // start with nothing visible
const matrix = new THREE.Matrix4();
const color = new THREE.Color();
for (let i = 0; i < n; i++) {
const tn = data.tn[i];
const fn = data.fn[i];
const an = data.an[i];
const x = tn * 100 - 50;
const y = fn * 60;
const z = an * 30 - 15;
matrix.makeTranslation(x, y, z);
audioMesh.setMatrixAt(i, matrix);
const hue = (1 - fn) * 0.667;
const lightness = 0.3 + an * 0.4;
color.setHSL(hue, 1.0, lightness);
audioMesh.setColorAt(i, color);
}
audioMesh.instanceMatrix.needsUpdate = true;
audioMesh.instanceColor.needsUpdate = true;
scene.add(audioMesh);
// Show time cursor
timeCursor.visible = true;
timeCursor.position.x = -50;
// Update info panel
document.getElementById("info-panel").style.display = "block";
document.getElementById("info-rows").innerHTML = `
<div class="row"><span>Points</span><span class="val">${n.toLocaleString()}</span></div>
<div class="row"><span>Duration</span><span class="val">${data.duration.toFixed(2)} s</span></div>
<div class="row"><span>Sample rate</span><span class="val">${data.sampleRate.toLocaleString()} Hz</span></div>
`;
// Camera reset
camera.position.set(0, 30, 120);
orbitControls.target.set(0, 20, 0);
orbitControls.update();
}
// ── Auto-orbit state ─────────────────────────────────────────────────────
// Camera orbits slowly when idle; pauses while user is interacting.
let autoOrbit = true;
let orbitResumeTimer = null;
const ORBIT_SPEED = 0.003; // radians per frame
const RESUME_DELAY = 3000; // ms after user releases controls
renderer.domElement.addEventListener("pointerdown", () => {
autoOrbit = false;
clearTimeout(orbitResumeTimer);
});
renderer.domElement.addEventListener("pointerup", () => {
clearTimeout(orbitResumeTimer);
orbitResumeTimer = setTimeout(() => {
autoOrbit = true;
}, RESUME_DELAY);
});
renderer.domElement.addEventListener("wheel", () => {
autoOrbit = false;
clearTimeout(orbitResumeTimer);
orbitResumeTimer = setTimeout(() => {
autoOrbit = true;
}, RESUME_DELAY);
});
// ── Animation loop ───────────────────────────────────────────────────────
const audioEl = document.getElementById("audio-player");
function animate() {
requestAnimationFrame(animate);
// Auto-orbit: rotate the camera around the scene target
if (autoOrbit) {
const target = orbitControls.target;
const dx = camera.position.x - target.x;
const dz = camera.position.z - target.z;
const cos = Math.cos(ORBIT_SPEED);
const sin = Math.sin(ORBIT_SPEED);
camera.position.x = target.x + dx * cos - dz * sin;
camera.position.z = target.z + dx * sin + dz * cos;
camera.lookAt(target);
}
orbitControls.update();
// Real-time point reveal: binary search on time-sorted t[] array
if (audioMesh && audioData && audioEl.duration > 0) {
const currentT = audioEl.currentTime;
if (audioEl.paused && currentT === 0) {
audioMesh.count = 0;
} else {
const times = audioData.t;
let lo = 0,
hi = times.length;
while (lo < hi) {
const mid = (lo + hi) >>> 1;
if (times[mid] <= currentT) lo = mid + 1;
else hi = mid;
}
audioMesh.count = lo;
}
}
// Sync time cursor
if (timeCursor.visible && audioEl.duration > 0) {
const t = audioEl.currentTime / audioEl.duration;
timeCursor.position.x = t * 100 - 50;
}
renderer.render(scene, camera);
}
animate();
// ── Resize ───────────────────────────────────────────────────────────────
window.addEventListener("resize", () => {
camera.aspect = innerWidth / innerHeight;
camera.updateProjectionMatrix();
renderer.setSize(innerWidth, innerHeight);
});
// ── UI ───────────────────────────────────────────────────────────────────
const fileInput = document.getElementById("file-input");
const analyzeBtn = document.getElementById("analyze-btn");
const pickBtn = document.getElementById("pick-btn");
const statusText = document.getElementById("status-text");
const loadingDot = document.getElementById("loading-dot");
const prompt = document.getElementById("prompt");
// Progress bar helpers
const progressWrap = document.getElementById("progress-bar-wrap");
const progressBar = document.getElementById("progress-bar");
const progressOverlay = document.getElementById("progress-overlay");
const progressFill = document.getElementById("progress-fill");
const progressPct = document.getElementById("progress-pct");
let _progressTimer = null;
function startProgress() {
progressWrap.classList.add("active");
progressOverlay.classList.add("active");
setProgress(0);
// Simulate: 0→85% over ~8s, slowing near the top
let pct = 0;
_progressTimer = setInterval(() => {
const step = (85 - pct) * 0.04 + 0.3;
pct = Math.min(pct + step, 85);
setProgress(pct);
}, 200);
}
function setProgress(pct) {
progressBar.style.width = pct + "%";
progressFill.style.width = pct + "%";
progressPct.textContent = Math.round(pct) + "%";
}
function finishProgress() {
clearInterval(_progressTimer);
setProgress(100);
setTimeout(() => {
progressWrap.classList.remove("active");
progressOverlay.classList.remove("active");
progressBar.style.width = "0%";
progressFill.style.width = "0%";
}, 500);
}
let selectedFile = null;
pickBtn.addEventListener("click", () => fileInput.click());
fileInput.addEventListener("change", (e) => {
const file = e.target.files[0];
if (!file) return;
selectedFile = file;
statusText.textContent = file.name;
analyzeBtn.disabled = false;
prompt.classList.add("hidden");
// Revoke previous object URL if any
if (audioEl.src) URL.revokeObjectURL(audioEl.src);
audioEl.src = URL.createObjectURL(file);
audioEl.style.display = "block";
});
analyzeBtn.addEventListener("click", async () => {
if (!selectedFile) return;
analyzeBtn.disabled = true;
pickBtn.disabled = true;
loadingDot.classList.add("active");
statusText.textContent = "Analyzing…";
startProgress();
const formData = new FormData();
formData.append("file", selectedFile);
try {
const resp = await fetch("/analyze", {
method: "POST",
body: formData,
});
const data = await resp.json();
if (data.error) {
alert("Error: " + data.error);
statusText.textContent = "Analysis failed.";
} else {
buildVisualization(data);
statusText.textContent = `${data.pointCount.toLocaleString()} points rendered`;
}
} catch (err) {
alert("Network error: " + err.message);
statusText.textContent = "Error.";
} finally {
finishProgress();
analyzeBtn.disabled = false;
pickBtn.disabled = false;
loadingDot.classList.remove("active");
}
});
</script>
</body>
</html>