Package Insights
((week_daily_avg - month_daily_avg) / month_daily_avg) * 100Weekly Downloads
GitHub Stars
Downloads by OS
Python Versions
Top Countries
Dependencies
- anyascii >=0.3.2
- blingfire ==0.1.8
- boto3
- charset-normalizer >=3.2.0
- fasttext-wheel ==0.9.2
- fsspec >=2023.6.0
- jq
- jsonpath-ng
- msgspec >=0.14.2
- necessary >=0.4.3
- nltk >=3.9.1
- numpy <2
- omegaconf >=2.3.0
- platformdirs >=4.2.0
- python-dotenv >=0.19.0
- pyyaml
- requests
- rich
- s3fs ==2023.6.0
- smart-open >=7.0.4
- tokenizers <=0.19.1,>=0.15.0
- tqdm
- uniseg
- zstandard >=0.23.0
32 optional dependencies
- beautifulsoup4[code]
- black[dev]
- brotli[trafilatura]
- cchardet[trafilatura]
- detect-secrets[code]
- dolma[trafilatura]
- dolma[all]
- dolma[resiliparse]
- fasttext-wheel[lang]
- fastwarc[warc]
- faust-cchardet[trafilatura]
- flake8[dev]
- flake8-pyi[dev]
- flake8-pyproject[dev]
- htmldate[trafilatura]
- ipdb[dev]
- ipython[dev]
- isort[dev]
- lingua-language-detector[lang]
- mypy[dev]
- py3langid[trafilatura]
- pycld2[lang]
- pygments[code]
- pytest[dev]
- regex[pii]
- regex[code]
- resiliparse[resiliparse]
- trafilatura[trafilatura]
- types-dateparser[dev]
- types-pyyaml[dev]
- url-normalize[warc]
- w3lib[warc]