Listening...
Listening...
<script setup lang="ts">
import {
SpeechInput,
SpeechInputCancelButton,
SpeechInputPreview,
SpeechInputRecordButton,
} from '@/components/elevenlabs-ui/speech-input'
import { Input } from '@/components/ui/input'
import { Textarea } from '@/components/ui/textarea'
import { ref } from 'vue'
import { toast } from 'vue-sonner'
async function getToken() {
try {
const response = await fetch('/api/get-scribe-token', {
method: 'POST',
})
if (!response.ok) {
throw new Error('Failed to get token')
}
const data = await response.json()
if (data.error) {
throw new Error(data.error)
}
return data.token
}
catch (error) {
console.error(error)
throw error
}
}
// --- TextareaWithSpeechInputRight ---
const rightValue = ref('')
const rightValueAtStart = ref('')
function onStartRight() {
rightValueAtStart.value = rightValue.value
}
function onChangeRight({ transcript }: { transcript: string }) {
rightValue.value = rightValueAtStart.value + transcript
}
function onStopRight({ transcript }: { transcript: string }) {
rightValue.value = rightValueAtStart.value + transcript
}
function onCancelRight() {
rightValue.value = rightValueAtStart.value
}
function onError(error: Error | Event) {
toast.error(String(error))
}
// --- TextareaWithSpeechInputLeft ---
const leftValue = ref('')
const leftValueAtStart = ref('')
function onStartLeft() {
leftValueAtStart.value = leftValue.value
}
function onChangeLeft({ transcript }: { transcript: string }) {
leftValue.value = leftValueAtStart.value + transcript
}
function onStopLeft({ transcript }: { transcript: string }) {
leftValue.value = leftValueAtStart.value + transcript
}
function onCancelLeft() {
leftValue.value = leftValueAtStart.value
}
// --- InputWithSpeechInput ---
const inputValue = ref('')
const inputValueAtStart = ref('')
function onStartInput() {
inputValueAtStart.value = inputValue.value
}
function onChangeInput({ transcript }: { transcript: string }) {
inputValue.value = inputValueAtStart.value + transcript
}
function onStopInput({ transcript }: { transcript: string }) {
inputValue.value = inputValueAtStart.value + transcript
}
function onCancelInput() {
inputValue.value = inputValueAtStart.value
}
</script>
<template>
<div class="absolute inset-0 space-y-4 overflow-auto rounded-2xl p-10">
<!-- TextareaWithSpeechInputRight -->
<div class="relative">
<Textarea
v-model="rightValue"
placeholder="Jot down some thoughts..."
class="min-h-[120px] resize-none rounded-2xl px-3.5 pt-3 pb-14"
/>
<div class="absolute right-3 bottom-3 flex items-center gap-2">
<SpeechInput
size="sm"
:get-token="getToken"
@start="onStartRight"
@change="onChangeRight"
@stop="onStopRight"
@cancel="onCancelRight"
@error="onError"
>
<SpeechInputCancelButton />
<SpeechInputPreview placeholder="Listening..." />
<SpeechInputRecordButton />
</SpeechInput>
</div>
</div>
<!-- TextareaWithSpeechInputLeft -->
<div class="relative">
<Textarea
v-model="leftValue"
placeholder="Jot down some thoughts..."
class="min-h-[120px] resize-none rounded-2xl px-3.5 pt-3 pb-14"
/>
<div class="absolute bottom-3 left-3 flex items-center gap-2">
<SpeechInput
size="sm"
:get-token="getToken"
@start="onStartLeft"
@change="onChangeLeft"
@stop="onStopLeft"
@cancel="onCancelLeft"
@error="onError"
>
<SpeechInputRecordButton />
<SpeechInputPreview placeholder="Listening..." />
<SpeechInputCancelButton />
</SpeechInput>
</div>
</div>
<!-- InputWithSpeechInput -->
<div class="flex items-center gap-2.5">
<Input
v-model="inputValue"
placeholder="Give this idea a title..."
class="min-w-0 flex-1 px-3.5 text-base transition-[flex-basis] duration-200 md:text-sm"
/>
<SpeechInput
class="shrink-0"
:get-token="getToken"
@start="onStartInput"
@change="onChangeInput"
@stop="onStopInput"
@cancel="onCancelInput"
@error="onError"
>
<SpeechInputCancelButton />
<SpeechInputRecordButton />
</SpeechInput>
</div>
</div>
</template>Installation
pnpm dlx elevenlabs-ui-vue@latest add speech-input
Usage
import {
SpeechInput,
SpeechInputCancelButton,
SpeechInputPreview,
SpeechInputRecordButton,
} from "@/components/elevenlabs-ui/speech-input"Basic Usage
<script setup lang="ts">
async function getToken() {
const response = await fetch("/api/get-scribe-token", { method: "POST" })
const json = await response.json()
return json.token
}
function handleChange(data: any) {
console.log(data.transcript)
}
function handleStop(data: any) {
console.log("Final:", data.transcript)
}
</script>
<template>
<SpeechInput
:getToken="getToken"
@change="handleChange"
@stop="handleStop"
>
<SpeechInputRecordButton />
<SpeechInputPreview placeholder="Start speaking..." />
<SpeechInputCancelButton />
</SpeechInput>
</template>With Form Input
<script setup lang="ts">
import { ref } from "vue"
const value = ref("")
function handleStop(data: any) {
value.value = value.value + " " + data.transcript
}
</script>
<template>
<div class="flex items-center gap-2">
<input
v-model="value"
class="flex-1 rounded border px-3 py-2"
/>
<SpeechInput
:getToken="getToken"
@stop="handleStop"
>
<SpeechInputRecordButton />
<SpeechInputPreview />
<SpeechInputCancelButton />
</SpeechInput>
</div>
</template>Reversed Layout
The component automatically adjusts its layout based on child order:
<template>
<SpeechInput :getToken="getToken">
<SpeechInputCancelButton />
<SpeechInputPreview />
<SpeechInputRecordButton />
</SpeechInput>
</template>Minimal (Record Button Only)
<template>
<SpeechInput
:getToken="getToken"
@stop="handleStop"
>
<SpeechInputRecordButton />
</SpeechInput>
</template>Custom Placeholder
<template>
<SpeechInput :getToken="getToken">
<SpeechInputRecordButton />
<SpeechInputPreview placeholder="Say something..." />
<SpeechInputCancelButton />
</SpeechInput>
</template>Using the Composable
Access the speech input context in child components:
<script setup lang="ts">
import { useSpeechInput } from "@/components/elevenlabs-ui/speech-input"
const { transcript, isConnected, isConnecting } = useSpeechInput()
</script>
<template>
<div>
<p>
Status:
<span v-if="isConnecting">Connecting</span>
<span v-else-if="isConnected">Recording</span>
<span v-else>Idle</span>
</p>
<p>Transcript: {{ transcript }}</p>
</div>
</template>Server Route for Token
Create a server action to securely fetch the Scribe token:
export default defineEventHandler(async () => {
const response = await fetch(
"https://api.elevenlabs.io/v1/speech-to-text/get-realtime-token",
{
method: "POST",
headers: {
"Content-Type": "application/json",
"xi-api-key": process.env.ELEVENLABS_API_KEY as string,
},
body: JSON.stringify({
model_id: "scribe_v2_realtime",
ttl_secs: 300,
}),
}
)
const data = await response.json()
return { token: data.token }
})API Reference
SpeechInput
The root component that manages speech-to-text state and provides context to child components.
Props
| Prop | Type | Default | Description |
|---|---|---|---|
| getToken | () => Promise<string> | - | Function to fetch ElevenLabs Scribe token |
| modelId | string | "scribe_v2_realtime" | ElevenLabs model ID |
| baseUri | string | - | Custom WebSocket base URI |
| commitStrategy | CommitStrategy | "vad" | How transcripts are committed ("manual" or "vad") |
| vadSilenceThresholdSecs | number | - | VAD silence threshold (0.3-3.0) |
| vadThreshold | number | - | VAD threshold (0.1-0.9) |
| minSpeechDurationMs | number | - | Minimum speech duration (50-2000ms) |
| minSilenceDurationMs | number | - | Minimum silence duration (50-2000ms) |
| languageCode | string | - | ISO-639-1/3 language code |
| microphone | MicrophoneOptions | See below | Microphone configuration |
| audioFormat | AudioFormat | - | Audio format for manual streaming |
| sampleRate | number | - | Sample rate for manual streaming |
| class | string | - | Optional CSS classes |
Emits
| Event | Type | Description |
|---|---|---|
| change | (data: SpeechInputData) => void | Called when transcript changes |
| start | (data: SpeechInputData) => void | Called when recording starts |
| stop | (data: SpeechInputData) => void | Called when recording stops |
| cancel | (data: SpeechInputData) => void | Called when recording is cancelled |
| error | (error: Error | Event) => void | Called on connection errors |
| authError | (data: { error: string }) => void | Called on authentication errors |
| quotaExceededError | (data: { error: string }) => void | Called when quota is exceeded |
Default Microphone Options
{
echoCancellation: true,
noiseSuppression: true
}SpeechInputRecordButton
Toggle button that switches between microphone icon (idle), connecting indicator, and stop icon (recording).
Props
| Prop | Type | Description |
|---|---|---|
| class | string | Optional CSS classes |
| disabled | boolean | Disable the button |
| ...props | InstanceType<typeof Button> | All button props |
Emits
| Event | Type | Description |
|---|---|---|
| click | (e: MouseEvent) => void | Additional click handler |
SpeechInputPreview
Displays the current transcript with smooth text animations.
Props
| Prop | Type | Default | Description |
|---|---|---|---|
| placeholder | string | "Listening..." | Text shown when empty |
| class | string | - | Optional CSS classes |
| ...props | HTMLDivElement | - | All div props |
SpeechInputCancelButton
Button to cancel the current recording and clear the transcript.
Props
| Prop | Type | Description |
|---|---|---|
| class | string | Optional CSS classes |
| ...props | HTMLButtonElement | All button props |
Emits
| Event | Type | Description |
|---|---|---|
| click | (e: MouseEvent) => void | Additional click handler |
useSpeechInput
Composable to access speech input context from child components.
Returns
| Property | Type | Description |
|---|---|---|
| isConnected | boolean | Whether currently connected/recording |
| isConnecting | boolean | Whether connection is in progress |
| transcript | string | Full transcript (committed + partial) |
| partialTranscript | string | Current partial transcript |
| committedTranscripts | string[] | Array of committed transcripts |
| error | string | null | Current error message |
| start | () => Promise<void> | Start recording |
| stop | () => void | Stop recording |
| cancel | () => void | Cancel and clear transcript |
SpeechInputData
Data object passed to callbacks.
interface SpeechInputData {
partialTranscript: string
committedTranscripts: string[]
transcript: string // Combined full transcript
}CommitStrategy
enum CommitStrategy {
MANUAL = "manual",
VAD = "vad",
}AudioFormat
enum AudioFormat {
PCM_8000 = "pcm_8000",
PCM_16000 = "pcm_16000",
PCM_22050 = "pcm_22050",
PCM_24000 = "pcm_24000",
PCM_44100 = "pcm_44100",
PCM_48000 = "pcm_48000",
ULAW_8000 = "ulaw_8000",
}Features
- Real-time Transcription: Live speech-to-text using ElevenLabs Scribe
- Compound Components: Flexible composition with record button, preview, and cancel
- Animated Transitions: Smooth expand/collapse animations using motion-v
- Voice Activity Detection: Automatic transcript commits based on speech pauses
- Visual Feedback: Distinct states for idle, connecting, and recording
- Accessibility: Proper ARIA labels and keyboard interaction
Notes
- Requires an ElevenLabs API key for generating Scribe tokens
- Token generation should happen server-side to protect your API key
- The component automatically handles microphone permissions
- Uses WebSocket for real-time communication with ElevenLabs Scribe API
- VAD (Voice Activity Detection) mode automatically commits transcripts during pauses
- The preview component uses a gradient mask for text overflow
- Layout automatically adjusts based on whether the record button is first or last
On This Page
InstallationUsageBasic UsageWith Form InputReversed LayoutMinimal (Record Button Only)Custom PlaceholderUsing the ComposableServer Route for TokenAPI ReferenceSpeechInputSpeechInputRecordButtonSpeechInputPreviewSpeechInputCancelButtonuseSpeechInputSpeechInputDataCommitStrategyAudioFormatFeaturesNotes