Package Insights
((week_daily_avg - month_daily_avg) / month_daily_avg) * 100Weekly Downloads
GitHub Stars
Downloads by OS
Python Versions
Top Countries
Dependencies
- aiohttp ==3.10.11
- apipeline >=0.2.12
- nest-asyncio
- numpy >=1.22.0
- pillow
- protobuf
- pydantic
- pydub
- pyloudnorm
- python-dotenv
- requests
- scipy
446 optional dependencies
- a2a-sdk[a2a]
- accelerate[accelerate]
- accelerate[tts-step]
- accelerate[musetalk-avatar]
- accelerate[llm-transformers-manual-speech-higgs]
- accelerate[tts-f5]
- achatbot[langchain-openai-tidb-vector]
- achatbot[lite-avatar-gpu]
- achatbot[litellm-processor]
- achatbot[livekit-room-audio-stream]
- achatbot[livekit-transport]
- achatbot[llm-processor]
- achatbot[llm-transformers-manual-speech-llama]
- achatbot[llm-transformers-manual-speech-llasa]
- achatbot[llm-transformers-manual-speech-spark]
- achatbot[llm-transformers-manual-vision]
- achatbot[llm-transformers-manual-vision-deepseek-ocr]
- achatbot[llm-transformers-manual-vision-deepseekvl2]
- achatbot[llm-transformers-manual-vision-fastvlm]
- achatbot[llm-transformers-manual-vision-gemma]
- achatbot[llm-transformers-manual-vision-glm4v]
- achatbot[llm-transformers-manual-vision-img-janus]
- achatbot[llm-transformers-manual-vision-keye]
- achatbot[llm-transformers-manual-vision-kimi]
- achatbot[llm-transformers-manual-vision-llama]
- achatbot[llm-transformers-manual-vision-mimo]
- achatbot[llm-transformers-manual-vision-molmo]
- achatbot[llm-transformers-manual-vision-qwen]
- achatbot[llm-transformers-manual-vision-skyworkr1v]
- achatbot[llm-transformers-manual-vision-smolvlm]
- achatbot[llm-transformers-manual-vision-speech-gemma]
- achatbot[llm-transformers-manual-vision-speech-phi]
- achatbot[llm-transformers-manual-vision-voice-minicpmo]
- achatbot[llm-transformers-manual-vision-voice-qwen]
- achatbot[llm-transformers-manual-voice-freeze-omni]
- achatbot[llm-transformers-manual-voice-glm]
- achatbot[llm-transformers-manual-voice-kimi]
- achatbot[llm-transformers-manual-voice-vita]
- achatbot[local-terminal-chat-bot]
- achatbot[musetalk-avatar]
- achatbot[openai-llm-processor]
- achatbot[queue]
- achatbot[remote-grpc-tts-client]
- achatbot[remote-grpc-tts-server]
- achatbot[remote-queue-chat-bot-be-worker]
- achatbot[remote-queue-chat-bot-fe]
- achatbot[remote-rpc-chat-bot-be-worker]
- achatbot[remote-rpc-chat-bot-fe]
- achatbot[silero-vad]
- achatbot[silero-vad-analyzer]
- achatbot[speech-asr]
- achatbot[speech-tts]
- achatbot[speech-vad]
- achatbot[speech-vad-analyzer]
- achatbot[speech-waker]
- achatbot[step-voice-processor]
- achatbot[tts-cosy-voice]
- achatbot[tts-cosy-voice2]
- achatbot[tts-f5]
- achatbot[tts-fishspeech]
- achatbot[tts-generator-spark]
- achatbot[tts-llasa]
- achatbot[tts-mega3]
- achatbot[tts-openvoicev2]
- achatbot[tts-orpheus]
- achatbot[tts-processor]
- achatbot[tts-spark]
- achatbot[tts-step]
- achatbot[tts-zonos]
- achatbot[tts-zonos-hybrid]
- achatbot[vad-recorder]
- achatbot[vision-transformers-got-ocr]
- achatbot[webrtc-silero-vad]
- achatbot[websocket-server-transport]
- achatbot[tts-higgs]
- achatbot[agora]
- achatbot[agora-channel-audio-stream]
- achatbot[agora-transport]
- achatbot[ai-frameworks-processor]
- achatbot[asr-processor]
- achatbot[codec-wavtokenizer]
- achatbot[core-llm]
- achatbot[daily-langchain-rag-bot]
- achatbot[daily-room-audio-stream]
- achatbot[daily-rtvi-bot]
- achatbot[daily-transport]
- achatbot[daily-webrtc-terminal-chat-bot]
- achatbot[daily-webrtc-vad-analyzer]
- achatbot[diffusers]
- achatbot[freeze-omni-voice-processor]
- achatbot[glm-voice-processor]
- achatbot[google-llm-processor]
- addict[lam-audio2expression-avatar]
- addict[llm-transformers-manual-vision-deepseek-ocr]
- agora-python-server-sdk-v1[agora]
- agora-realtime-ai-api-v1[agora]
- aiortc[webrtc]
- attrdict[llm-transformers-manual-vision-img-janus]
- attrdict[tts-mega3]
- attrdict[llm-transformers-manual-vision-deepseekvl2]
- autoawq[autoawq]
- av[llm-transformers-manual-vision]
- av[musetalk-avatar]
- av[lite-avatar]
- backoff[llm-transformers-manual-vision-speech-phi]
- bitsandbytes[bitsandbytes]
- bitsandbytes[tts-f5]
- bitsandbytes[llm-transformers-manual-vision-fastvlm]
- blobfile[llm-transformers-manual-voice-kimi]
- blobfile[llm-transformers-manual-vision-kimi]
- cached-path[tts-f5]
- causal-conv1d[tts-zonos-hybrid]
- click[tts-f5]
- cn2an[tts-openvoicev2]
- colorlog[agora]
- conformer[tts-cosy-voice]
- conformer[llm-transformers-manual-voice-kimi]
- conformer[llm-transformers-manual-voice-vita]
- conformer[tts-step]
- ctranslate2[ctranslate2]
- dacite[llm-transformers-manual-speech-higgs]
- daily-python[daily]
- daily-python[speech-audio-stream]
- datasets[tts-f5]
- decord[llm-transformers-manual-vision-voice-minicpmo]
- deepgram-sdk[deepgram-asr-processor]
- deep-translator[deep-translator]
- descript-audio-codec[llm-transformers-manual-speech-higgs]
- diffusers[tts-cosy-voice]
- diffusers[llm-transformers-manual-voice-vita]
- diffusers[diffusers]
- diffusers[musetalk-avatar]
- diffusers[llm-transformers-manual-voice-kimi]
- diffusers[tts-step]
- easydict[llm-transformers-manual-vision-deepseek-ocr]
- edge-tts[tts-edge]
- einops[einops]
- einops[codec-bitokenizer]
- einops[llm-transformers-manual-vision-fastvlm]
- einops-exts[llm-transformers-manual-vision-fastvlm]
- einx[codec-bitokenizer]
- ema-pytorch[tts-f5]
- encodec[codec-wavtokenizer]
- eng-to-ipa[tts-openvoicev2]
- fastapi[fastapi-bot-server]
- fastapi[fastapi]
- faster-whisper[whisper-faster-asr]
- fastmcp[mcp]
- ffmpeg-python[musetalk-avatar]
- flash-attn[flash-attn]
- flash-attn[tts-zonos-hybrid]
- flashinfer-python[flashinfer-python]
- funasr[sense-voice-asr]
- funasr[lite-avatar]
- funasr[llm-transformers-manual-voice-vita]
- funasr[tts-step]
- gdown[gdown]
- gdown[llm-transformers-manual-voice-vita]
- geocoder[llm-personalai-proxy]
- google-adk[google-adk]
- google-generativeai[google-ai]
- groq[whisper-groq-asr]
- grpcio[grpc]
- grpcio[rpc]
- grpcio-tools[grpc-tools]
- gtts[tts-g]
- h5py[lite-avatar]
- huggingface-hub[codec-wavtokenizer]
- huggingface-hub[llm-transformers-manual-voice-kimi]
- huggingface-hub[tts-zonos]
- hydra-core[conf]
- hyperpyyaml[tts-step]
- hyperpyyaml[tts-cosy-voice]
- hyperpyyaml[llm-transformers-manual-voice-vita]
- hyperpyyaml[llm-transformers-manual-voice-kimi]
- imageio[musetalk-avatar]
- inflect[tts-openvoicev2]
- inflect[tts-zonos]
- inflect[llm-transformers-manual-voice-vita]
- jieba[tts-f5]
- jieba[tts-openvoicev2]
- jieba[lite-avatar]
- jieba[llm-transformers-manual-speech-higgs]
- jiwer[llm-transformers-manual-voice-vita]
- json-repair[llm-transformers-manual-speech-higgs]
- kanjize[tts-zonos]
- keye-vl-utils[llm-transformers-manual-vision-keye]
- kokoro-onnx[tts-onnx-kokoro]
- langchain[ai-langchain-framework-processor]
- langchain-community[langchain-openai-tidb-vector]
- langchain-openai[langchain-openai-tidb-vector]
- langchain-text-splitters[langchain-openai-tidb-vector]
- langdetect[tts-mega3]
- langid[tts-openvoicev2]
- librosa[lam-audio2expression-avatar]
- librosa[llm-transformers-manual-speech-higgs]
- librosa[librosa]
- lightning[tts-fishspeech]
- lightning[llm-transformers-manual-voice-vita]
- lightning[tts-cosy-voice]
- litellm[litellm]
- livekit[livekit]
- livekit-api[livekit-api]
- llama-cpp-python[llama-cpp]
- loguru[tts-fishspeech]
- loguru[llm-transformers-manual-speech-higgs]
- loguru[llm-transformers-manual-voice-kimi]
- loralib[tts-fishspeech]
- mamba-ssm[tts-zonos-hybrid]
- markdown2[llm-transformers-manual-vision-fastvlm]
- matplotlib[llm-transformers-manual-voice-vita]
- matplotlib[matplotlib]
- matplotlib[tts-f5]
- mcp[mcp]
- mem0ai[mem0]
- mlx-whisper[whisper-mlx-asr]
- modelscope[tts-step]
- modelscope[llm-transformers-manual-voice-vita]
- modelscope[tts-cosy-voice]
- moshi[codec-moshi-mimi]
- moshi[moshi-voice-processor]
- moviepy[llm-transformers-manual-vision-voice-minicpmo]
- moviepy[musetalk-avatar]
- munch[tts-kokoro]
- natsort[tts-fishspeech]
- natsort[llm-transformers-manual-voice-vita]
- nemo-text-processing[tts-chat]
- nest-asyncio[ngrok-proxy]
- num2words[llm-transformers-manual-vision-smolvlm]
- numpy[llm-transformers-manual-vision-fastvlm]
- numpy[lite-avatar]
- numpy[codec-bitokenizer]
- numpy[tts-f5]
- numpy[llm-transformers-manual-vision-voice-qwen]
- numpy[tts-orpheus]
- numpy[llm-transformers-manual-vision-kimi]
- numpy[codec-wavtokenizer]
- numpy[lam-audio2expression-avatar]
- omegaconf[codec-bitokenizer]
- omegaconf[llm-transformers-manual-speech-higgs]
- omegaconf[lam-audio2expression-avatar]
- omegaconf[conf]
- onnx[sense-voice-asr]
- onnxconverter-common[sense-voice-asr]
- onnxruntime[tts-cosy-voice]
- onnxruntime[lite-avatar]
- onnxruntime[silero-vad]
- onnxruntime-gpu[lite-avatar-gpu]
- onnxruntime-gpu[tts-step]
- openai[img-processor]
- openai[openai]
- openai-whisper[tts-step]
- openai-whisper[whisper-asr]
- openai-whisper[llm-transformers-manual-voice-vita]
- openai-whisper[llm-transformers-manual-voice-kimi]
- openai-whisper[tts-cosy-voice]
- openai-whisper[tts-mega3]
- opencv-python[opencv]
- opencv-python-headless[lam-audio2expression-avatar]
- packaging[codec-bitokenizer]
- pandas[llm-transformers-manual-voice-kimi]
- pandas[llm-transformers-manual-speech-higgs]
- peft[llm-transformers-manual-vision-speech-phi]
- peft[llm-transformers-manual-vision-fastvlm]
- phonemizer[tts-kokoro]
- phonemizer[tts-zonos]
- protobuf[lam-audio2expression-avatar]
- protobuf[tts-step]
- pvporcupine[porcupine-wakeword]
- pyannote-audio[pyannote-vad]
- pyaudio[speech-audio-stream]
- pyaudio[pyaudio-stream]
- pybase16384[tts-chat]
- pyee[pyee]
- pyloudnorm[tts-mega3]
- pymysql[langchain-openai-tidb-vector]
- pyngrok[ngrok-proxy]
- pynini[tts-chat]
- pypinyin[tts-openvoicev2]
- pypinyin[tts-f5]
- pypinyin[lite-avatar]
- pyrootutils[tts-fishspeech]
- pytest[test]
- pytest-mock[test]
- pytorch-lightning[tts-fishspeech]
- pyttsx3[tts-pyttsx3]
- pytube[pytube]
- pywhispercpp[whisper-cpp]
- pyworld[llm-transformers-manual-voice-vita]
- pyyaml[codec-wavtokenizer]
- pyyaml[yaml]
- qwen-omni-utils[llm-transformers-manual-vision-speech-phi]
- qwen-omni-utils[llm-transformers-manual-vision-voice-qwen]
- qwen-vl-utils[llm-transformers-manual-vision]
- redis[redis]
- rich[llm-transformers-manual-voice-vita]
- rich[tts-fishspeech]
- safetensors[codec-bitokenizer]
- safetensors[llm-transformers-manual-voice-vita]
- scikit-learn[llm-transformers-manual-vision-fastvlm]
- sentencepiece[llm-transformers-manual-vision-deepseekvl2]
- sentencepiece[llm-transformers-manual-vision-fastvlm]
- sentencepiece[llm-transformers-manual-vision-img-janus]
- sentencepiece[tts-step]
- sentence-transformers[test]
- setproctitle[tts-mega3]
- sglang[sglang]
- shortuuid[llm-transformers-manual-vision-fastvlm]
- six[llm-transformers-manual-voice-kimi]
- six[tts-step]
- snac[codec-snac]
- soundfile[codec-bitokenizer]
- soundfile[llm-transformers-manual-speech-higgs]
- soundfile[soundfile]
- sox[llm-transformers-manual-voice-kimi]
- sox[tts-step]
- soxr[codec-bitokenizer]
- sudachidict-full[tts-zonos]
- sudachipy[tts-zonos]
- supervision[vision-yolo-detector]
- tensorflow[musetalk-avatar]
- tensorrt[tensorrt]
- tensorrt-llm[trtllm]
- termcolor[lam-audio2expression-avatar]
- tidb-vector[langchain-openai-tidb-vector]
- tiktoken[llm-transformers-manual-voice-vita]
- tiktoken[tiktoken]
- tiktoken[tts-fishspeech]
- timm[llm-transformers-manual-vision-deepseekvl2]
- timm[llm-transformers-manual-vision-fastvlm]
- timm[llm-transformers-manual-vision-img-janus]
- timm[llm-transformers-manual-vision-skyworkr1v]
- timm[llm-transformers-manual-vision-speech-gemma]
- timm[llm-transformers-manual-voice-kimi]
- together[together-ai]
- tokenizers[llm-transformers-manual-vision-deepseek-ocr]
- tokenizers[llm-transformers-manual-vision-fastvlm]
- tomli[tts-f5]
- torch[codec-bitokenizer]
- torch[llm-transformers-manual-speech-higgs]
- torch[llm-transformers-manual-vision-deepseek-ocr]
- torch[llm-transformers-manual-vision-voice-minicpmo]
- torch[llm-transformers-manual-voice]
- torch[llm-transformers-manual-voice-kimi]
- torch[sense-voice-asr]
- torch[smart-turn]
- torch[torch-vision-audio]
- torch[tts-chat]
- torch[tts-cosy-voice]
- torch[tts-f5]
- torch[tts-fishspeech]
- torch[tts-kokoro]
- torch[tts-mega3]
- torch[tts-orpheus]
- torch[tts-step]
- torch[tts-zonos]
- torchaudio[codec-bitokenizer]
- torchaudio[llm-transformers-manual-speech-higgs]
- torchaudio[llm-transformers-manual-vision-deepseek-ocr]
- torchaudio[llm-transformers-manual-vision-voice-minicpmo]
- torchaudio[llm-transformers-manual-voice]
- torchaudio[llm-transformers-manual-voice-kimi]
- torchaudio[sense-voice-asr]
- torchaudio[torch-vision-audio]
- torchaudio[tts-cosy-voice]
- torchaudio[tts-f5]
- torchaudio[tts-fishspeech]
- torchaudio[tts-mega3]
- torchaudio[tts-orpheus]
- torchaudio[tts-step]
- torchaudio[tts-zonos]
- torchdiffeq[llm-transformers-manual-vision-voice-qwen]
- torchdiffeq[tts-f5]
- torchdiffeq[tts-mega3]
- torchdyn[llm-transformers-manual-voice-kimi]
- torchvision[codec-bitokenizer]
- torchvision[llm-transformers-manual-vision-deepseek-ocr]
- torchvision[llm-transformers-manual-vision-voice-minicpmo]
- torchvision[torch-vision-audio]
- torchvision[tts-step]
- tqdm[llm-transformers-manual-voice-kimi]
- tqdm[local-terminal-chat-bot]
- transformers[codec-bitokenizer]
- transformers[codec-transformers-dac]
- transformers[codec-transformers-mimi]
- transformers[lam-audio2expression-avatar]
- transformers[lite-avatar]
- transformers[llm-transformers-manual-speech-higgs]
- transformers[llm-transformers-manual-vision]
- transformers[llm-transformers-manual-vision-deepseek-ocr]
- transformers[llm-transformers-manual-vision-deepseekvl2]
- transformers[llm-transformers-manual-vision-fastvlm]
- transformers[llm-transformers-manual-vision-speech-phi]
- transformers[llm-transformers-manual-vision-voice-minicpmo]
- transformers[llm-transformers-manual-voice]
- transformers[llm-transformers-manual-voice-kimi]
- transformers[llm-transformers-manual-voice-vita]
- transformers[musetalk-avatar]
- transformers[smart-turn]
- transformers[transformers]
- transformers[tts-chat]
- transformers[tts-cosy-voice]
- transformers[tts-f5]
- transformers[tts-fishspeech]
- transformers[tts-kokoro]
- transformers[tts-mega3]
- transformers[tts-orpheus]
- transformers[tts-step]
- transformers[tts-zonos]
- transformers[whisper-transformers-asr]
- tts[tts-coqui]
- typeguard[lite-avatar]
- ultralytics[vision-yolo-detector]
- unidecode[tts-openvoicev2]
- uvicorn[fastapi-bot-server]
- vector-quantize-pytorch[lite-avatar]
- vector-quantize-pytorch[llm-transformers-manual-speech-higgs]
- vector-quantize-pytorch[llm-transformers-manual-vision-voice-minicpmo]
- vector-quantize-pytorch[tts-chat]
- vector-quantize-pytorch[tts-fishspeech]
- verovio[verovio]
- vllm[vllm]
- vocos[lite-avatar]
- vocos[llm-transformers-manual-vision-voice-minicpmo]
- vocos[tts-chat]
- vocos[tts-f5]
- wandb[tts-f5]
- wavmark[tts-openvoicev2]
- webrtcvad[webrtcvad]
- websockets[cartesia-tts-processor]
- websockets[websocket]
- wetextprocessing[llm-transformers-manual-voice-vita]
- wetextprocessing[tts-chat]
- wetextprocessing[tts-cosy-voice]
- wetextprocessing[tts-mega3]
- wget[llm-transformers-manual-voice-vita]
- wget[tts-cosy-voice]
- whisper-timestamped[whisper-timestamped-asr]
- word2number[llm-transformers-manual-voice-vita]
- xcodec2[codec-xcodec2]
- xformers[llm-transformers-manual-vision-deepseekvl2]
- x-transformers[llm-transformers-manual-vision-voice-qwen]
- x-transformers[tts-f5]
- x-transformers[tts-mega3]
- yapf[lam-audio2expression-avatar]
- zhon[llm-transformers-manual-voice-vita]