| Afrikaans |
charset_table=non_cont |
af |
- |
|
| Arabic |
charset_table=non_cont |
ar |
morphology=stem_ar (Arabic stemmer); morphology=libstemmer_ar |
|
| Armenian |
charset_table=non_cont |
hy |
- |
|
| Assamese |
specify charset_table specify charset_table manually |
- |
- |
|
| Basque |
charset_table=non_cont |
eu |
- |
|
| Bengali |
charset_table=non_cont |
bn |
- |
|
| Bishnupriya |
specify charset_table manually |
- |
- |
|
| Buhid |
specify charset_table manually |
- |
- |
|
| Bulgarian |
charset_table=non_cont |
bg |
- |
|
| Catalan |
charset_table=non_cont |
ca |
morphology=libstemmer_ca |
|
| Chinese using ICU |
charset_table=chinese |
zh |
morphology=icu_chinese |
More accurate than using ngrams |
| Chinese using Jieba |
charset_table=chinese |
zh |
morphology=jieba_chinese, requires package manticore-language-packs |
More accurate than using ngrams |
| Chinese using ngrams |
ngram_chars=chinese |
zh |
ngram_chars=1 |
Faster indexing, but the search performance might not be as good |
| Croatian |
charset_table=non_cont |
hr |
- |
|
| Kurdish |
charset_table=non_cont |
ckb |
- |
|
| Czech |
charset_table=non_cont |
cz |
morphology=stem_cz (Czech stemmer) |
|
| Danish |
charset_table=non_cont |
da |
morphology=libstemmer_da |
|
| Dutch |
charset_table=non_cont |
nl |
morphology=libstemmer_nl |
|
| English |
charset_table=non_cont |
en |
morphology=lemmatize_en (single root form); morphology=lemmatize_en_all (all root forms); morphology=stem_en (Porter's English stemmer); morphology=stem_enru (Porter's English and Russian stemmers); morphology=libstemmer_en (English from libstemmer) |
|
| Esperanto |
charset_table=non_cont |
eo |
- |
|
| Estonian |
charset_table=non_cont |
et |
- |
|
| Finnish |
charset_table=non_cont |
fi |
morphology=libstemmer_fi |
|
| French |
charset_table=non_cont |
fr |
morphology=libstemmer_fr |
|
| Galician |
charset_table=non_cont |
gl |
- |
|
| Garo |
specify charset_table manually |
- |
- |
|
| German |
charset_table=non_cont |
de |
morphology=lemmatize_de (single root form); morphology=lemmatize_de_all (all root forms); morphology=libstemmer_de |
|
| Greek |
charset_table=non_cont |
el |
morphology=libstemmer_el |
|
| Hebrew |
charset_table=non_cont |
he |
- |
|
| Hindi |
charset_table=non_cont |
hi |
morphology=libstemmer_hi |
|
| Hmong |
specify charset_table manually |
- |
- |
|
| Ho |
specify charset_table manually |
- |
- |
|
| Hungarian |
charset_table=non_cont |
hu |
morphology=libstemmer_hu |
|
| Indonesian |
charset_table=non_cont |
id |
morphology=libstemmer_id |
|
| Irish |
charset_table=non_cont |
ga |
morphology=libstemmer_ga |
|
| Italian |
charset_table=non_cont |
it |
morphology=libstemmer_it |
|
| Japanese |
ngram_chars=japanese |
- |
ngram_chars=japanese ngram_len=1 |
Requires ngram-based segmentation |
| Komi |
specify charset_table manually |
- |
- |
|
| Korean |
ngram_chars=korean |
- |
ngram_chars=korean ngram_len=1 |
Requires ngram-based segmentation |
| Large Flowery Miao |
specify charset_table manually |
- |
- |
|
| Latin |
charset_table=non_cont |
la |
- |
|
| Latvian |
charset_table=non_cont |
lv |
- |
|
| Lithuanian |
charset_table=non_cont |
lt |
morphology=libstemmer_lt |
|
| Maba |
specify charset_table manually |
- |
- |
|
| Maithili |
specify charset_table manually |
- |
- |
|
| Marathi |
specify charset_table manually |
- |
- |
|
| Marathi |
charset_table=non_cont |
mr |
- |
|
| Mende |
specify charset_table manually |
- |
- |
|
| Mru |
specify charset_table manually |
- |
- |
|
| Myene |
specify charset_table manually |
- |
- |
|
| Nepali |
specify charset_table manually |
- |
morphology=libstemmer_ne |
|
| Ngambay |
specify charset_table manually |
- |
- |
|
| Norwegian |
charset_table=non_cont |
no |
morphology=libstemmer_no |
|
| Odia |
specify charset_table manually |
- |
- |
|
| Persian |
charset_table=non_cont |
fa |
- |
|
| Polish |
charset_table=non_cont |
pl |
- |
|
| Portuguese |
charset_table=non_cont |
pt |
morphology=libstemmer_pt |
|
| Romanian |
charset_table=non_cont |
ro |
morphology=libstemmer_ro |
|
| Russian |
charset_table=non_cont |
ru |
morphology=lemmatize_ru (single root form); morphology=lemmatize_ru_all (all root forms); morphology=stem_ru (Porter's Russian stemmer); morphology=stem_enru (Porter's English and Russian stemmers); morphology=libstemmer_ru (from libstemmer) |
|
| Santali |
specify charset_table manually |
- |
- |
|
| Sindhi |
specify charset_table manually |
- |
- |
|
| Slovak |
charset_table=non_cont |
sk |
- |
|
| Slovenian |
charset_table=non_cont |
sl |
- |
|
| Somali |
charset_table=non_cont |
so |
- |
|
| Sotho |
charset_table=non_cont |
st |
- |
|
| Spanish |
charset_table=non_cont |
es |
morphology=libstemmer_es |
|
| Swahili |
charset_table=non_cont |
sw |
- |
|
| Swedish |
charset_table=non_cont |
sv |
morphology=libstemmer_sv |
|
| Sylheti |
specify charset_table manually |
- |
- |
|
| Tamil |
specify charset_table manually |
- |
morphology=libstemmer_ta |
|
| Thai |
charset_table=thai |
th |
- |
|
| Turkish |
charset_table=non_cont |
tr |
morphology=libstemmer_tr |
|
| Ukrainian |
charset_table=non_cont,U+0406->U+0456,U+0456,U+0407->U+0457,U+0457,U+0490->U+0491,U+0491 |
- |
morphology=lemmatize_uk_all |
Requires installation of UK lemmatizer |
| Yoruba |
charset_table=non_cont |
yo |
- |
|
| Zulu |
charset_table=non_cont |
zu |
- |
|