Relative Content

Tag Archive for pytorchhuggingface-transformersopenai-apiopenai-whisper

OpenAI whisper: single word transcription for a audio file with ~1.5m frames

from datasets import load_dataset from huggingface_hub import login from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq import torch import torchaudio from torchaudio.utils import download_asset processor = AutoProcessor.from_pretrained(“openai/whisper-large-v3”) model = AutoModelForSpeechSeq2Seq.from_pretrained(“openai/whisper-large-v3”) sample_wav = download_asset(audio_file) print(torchaudio.info(sample_wav)) AudioMetaData(sample_rate=8000, num_frames=1564224, num_channels=1, bits_per_sample=8, encoding=PCM_U) # Resample to match Whisper sampling rate target_sample_rate = processor.feature_extractor.sampling_rate print(f’Whisper sampling rate: {target_sample_rate}’) if orig_sample_rate != target_sample_rate: transform […]