Package Insights
((week_daily_avg - month_daily_avg) / month_daily_avg) * 100Weekly Downloads
GitHub Stars
Downloads by OS
Python Versions
Top Countries
Dependencies
- absl-py <3.0.0,>=2.0.0
- comment-parser
- cosmos-xenna ==0.1.2
- fsspec
- hydra-core
- jieba ==0.42.1
- loguru
- mecab-python3
- omegaconf
- openai >=1.0.0
- pandas >=2.1.0
- pyarrow
- ray >=2.50
- torch
- transformers
42 optional dependencies
- av[video-cpu]
- beautifulsoup4[text-cpu]
- cudf-cu12[deduplication-cuda12]
- cuml-cu12[deduplication-cuda12]
- cvcuda-cu12[video-cuda12]
- easydict[video-cpu]
- einops[video-cpu]
- fasttext[text-cpu]
- flash-attn[video-cuda12]
- ftfy[text-cpu]
- gpustat[cuda12]
- justext[text-cpu]
- lxml[text-cpu]
- mwparserfromhell[text-cpu]
- nemo-curator[all]
- nemo-curator[audio-cuda12]
- nemo-curator[text-cuda12]
- nemo-curator[image-cuda12]
- nemo-curator[video-cuda12]
- nemo-toolkit[audio-cpu]
- nvidia-dali-cuda120[image-cuda12]
- nvidia-ml-py[cuda12]
- opencv-python[video-cpu]
- peft[text-cpu]
- pycld2[text-cpu]
- pycuda[video-cuda12]
- pylibcugraph-cu12[deduplication-cuda12]
- pylibraft-cu12[deduplication-cuda12]
- pynvvideocodec[video-cuda12]
- raft-dask-cu12[deduplication-cuda12]
- rapidsmpf-cu12[deduplication-cuda12]
- resiliparse[text-cpu]
- s5cmd[text-cpu]
- scikit-learn[deduplication-cuda12]
- sentencepiece[text-cpu]
- torch[video-cuda12]
- torchaudio[video-cuda12]
- torchvision[image-cpu]
- torchvision[video-cpu]
- trafilatura[text-cpu]
- vllm[vllm]
- warcio[text-cpu]