jaeswift-website/js/voice-mode.js

203 lines
9 KiB
JavaScript

// JAE-AI Voice Mode — Web Speech API (Phase 4)
// Adds mic button + auto-speak of assistant replies.
(function () {
'use strict';
const SR = window.SpeechRecognition || window.webkitSpeechRecognition;
const SY = window.speechSynthesis;
if (!SR || !SY) { console.warn('[voice] Web Speech API unsupported in this browser'); return; }
// Wait for DOM ready
function ready(fn) { if (document.readyState !== 'loading') fn(); else document.addEventListener('DOMContentLoaded', fn); }
const LS_AUTO = 'jae-voice-auto-speak';
const LS_VOICE = 'jae-voice-name';
const LS_RATE = 'jae-voice-rate';
const LS_LANG = 'jae-voice-lang';
let recognition = null;
let recognizing = false;
let silenceTimer = null;
let finalBuffer = '';
let voices = [];
function loadVoices() {
voices = SY.getVoices();
if (!voices.length) setTimeout(loadVoices, 250);
}
if (typeof SY.onvoiceschanged !== 'undefined') SY.onvoiceschanged = loadVoices;
loadVoices();
function stripMarkdownForSpeech(txt) {
if (!txt) return '';
return txt
.replace(/```[\s\S]*?```/g, ' code block ')
.replace(/`([^`]+)`/g, '$1')
.replace(/\*\*([^*]+)\*\*/g, '$1')
.replace(/\*([^*]+)\*/g, '$1')
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
.replace(/[#>_~]/g, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function chooseVoice() {
const preferName = localStorage.getItem(LS_VOICE);
const lang = localStorage.getItem(LS_LANG) || 'en-GB';
if (preferName) {
const m = voices.find(v => v.name === preferName);
if (m) return m;
}
const byLang = voices.find(v => v.lang === lang) || voices.find(v => v.lang.startsWith('en'));
return byLang || voices[0] || null;
}
function speak(text) {
if (!text) return;
try { SY.cancel(); } catch (e) {}
const clean = stripMarkdownForSpeech(text).slice(0, 600);
if (!clean) return;
const utter = new SpeechSynthesisUtterance(clean);
const voice = chooseVoice();
if (voice) { utter.voice = voice; utter.lang = voice.lang; }
utter.rate = parseFloat(localStorage.getItem(LS_RATE) || '1.05');
utter.pitch = 1.0;
utter.onstart = () => document.body.classList.add('jae-speaking');
utter.onend = () => document.body.classList.remove('jae-speaking');
utter.onerror = () => document.body.classList.remove('jae-speaking');
SY.speak(utter);
}
function findInput() {
return document.getElementById('chatInput') || document.querySelector('.chat-input textarea, .chat-input input');
}
function findSendBtn() {
return document.getElementById('chatSendBtn') || document.querySelector('.chat-send-btn, button[data-chat-send]');
}
function submitChat() {
const input = findInput();
const send = findSendBtn();
if (input && (input.value || '').trim() && send) send.click();
}
function setListeningUI(on) {
const btn = document.getElementById('chatMicBtn');
if (btn) btn.classList.toggle('listening', on);
document.body.classList.toggle('jae-listening', on);
}
function startListening() {
if (recognizing) return;
try {
recognition = new SR();
recognition.lang = localStorage.getItem(LS_LANG) || 'en-GB';
recognition.continuous = true;
recognition.interimResults = true;
finalBuffer = '';
recognition.onstart = () => { recognizing = true; setListeningUI(true); };
recognition.onresult = (ev) => {
let interim = '';
for (let i = ev.resultIndex; i < ev.results.length; i++) {
const r = ev.results[i];
if (r.isFinal) finalBuffer += r[0].transcript + ' ';
else interim += r[0].transcript;
}
const input = findInput();
if (input) input.value = (finalBuffer + interim).trim();
// auto-submit after 1.5s of silence (no new results)
if (silenceTimer) clearTimeout(silenceTimer);
silenceTimer = setTimeout(() => {
stopListening();
setTimeout(submitChat, 120);
}, 1500);
};
recognition.onerror = (e) => { console.warn('[voice] err', e.error); stopListening(); };
recognition.onend = () => { recognizing = false; setListeningUI(false); };
recognition.start();
} catch (e) {
console.warn('[voice] start failed', e);
}
}
function stopListening() {
if (silenceTimer) { clearTimeout(silenceTimer); silenceTimer = null; }
try { if (recognition) recognition.stop(); } catch (e) {}
recognizing = false;
setListeningUI(false);
}
function injectMicButton() {
const send = findSendBtn();
if (!send || document.getElementById('chatMicBtn')) return false;
const mic = document.createElement('button');
mic.id = 'chatMicBtn';
mic.className = 'chat-mic-btn';
mic.type = 'button';
mic.title = 'Voice mode (click to talk, auto-submits after 1.5s silence)';
mic.innerHTML = '🎙';
mic.addEventListener('click', () => { recognizing ? stopListening() : startListening(); });
send.parentNode.insertBefore(mic, send);
const gear = document.createElement('button');
gear.id = 'chatVoiceSettingsBtn';
gear.className = 'chat-voice-settings-btn';
gear.type = 'button';
gear.title = 'Voice settings';
gear.innerHTML = '⚙';
gear.addEventListener('click', openSettingsModal);
send.parentNode.insertBefore(gear, send);
return true;
}
function openSettingsModal() {
if (document.getElementById('jaeVoiceModal')) return;
const vs = (voices.length ? voices : SY.getVoices()).filter(v => (v.lang || '').toLowerCase().startsWith('en'));
const curName = localStorage.getItem(LS_VOICE) || '';
const curRate = parseFloat(localStorage.getItem(LS_RATE) || '1.05');
const autoSpeak = localStorage.getItem(LS_AUTO) === '1';
const overlay = document.createElement('div');
overlay.id = 'jaeVoiceModal';
overlay.className = 'jae-voice-modal';
overlay.innerHTML = `
<div class="jvm-panel">
<div class="jvm-head">VOICE SETTINGS<button class="jvm-close" type="button">✕</button></div>
<label class="jvm-row"><input type="checkbox" id="jvm-auto" ${autoSpeak ? 'checked' : ''}> Auto-speak agent replies</label>
<label class="jvm-row">Voice<select id="jvm-voice">${vs.map(v => `<option value="${v.name}" ${v.name === curName ? 'selected' : ''}>${v.name} (${v.lang})</option>`).join('')}</select></label>
<label class="jvm-row">Rate <span id="jvm-rate-val">${curRate.toFixed(2)}</span><input type="range" id="jvm-rate" min="0.8" max="1.3" step="0.05" value="${curRate}"></label>
<div class="jvm-row jvm-actions">
<button type="button" id="jvm-test">▶ Test voice</button>
<button type="button" id="jvm-save">Save</button>
</div>
</div>`;
document.body.appendChild(overlay);
const close = () => overlay.remove();
overlay.querySelector('.jvm-close').onclick = close;
overlay.addEventListener('click', (e) => { if (e.target === overlay) close(); });
const rateEl = overlay.querySelector('#jvm-rate');
const rateVal = overlay.querySelector('#jvm-rate-val');
rateEl.oninput = () => { rateVal.textContent = parseFloat(rateEl.value).toFixed(2); };
overlay.querySelector('#jvm-test').onclick = () => {
localStorage.setItem(LS_VOICE, overlay.querySelector('#jvm-voice').value);
localStorage.setItem(LS_RATE, rateEl.value);
speak('Voice check. JAE-AI online. All systems nominal.');
};
overlay.querySelector('#jvm-save').onclick = () => {
localStorage.setItem(LS_AUTO, overlay.querySelector('#jvm-auto').checked ? '1' : '0');
localStorage.setItem(LS_VOICE, overlay.querySelector('#jvm-voice').value);
localStorage.setItem(LS_RATE, rateEl.value);
close();
};
}
ready(function () {
// Try inject now; if chat not yet rendered, observe.
if (!injectMicButton()) {
const obs = new MutationObserver(() => { if (injectMicButton()) obs.disconnect(); });
obs.observe(document.body, { childList: true, subtree: true });
}
document.addEventListener('jae-agent-reply', (e) => {
if (localStorage.getItem(LS_AUTO) === '1') speak(e.detail && e.detail.text);
});
});
window.__jaeVoice = { speak, start: startListening, stop: stopListening, openSettings: openSettingsModal };
})();