Sefaria-Project
Sefaria-Project copied to clipboard
word_form.language_code has lots of inconsistent values
> db.word_form.distinct("language_code")
[
" Ar. ",
" Ar.)",
" Ar.,",
" b. h.)",
" b. h.,",
" b. h.;",
" ch. ",
" ch. =",
" ch. = (b.",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_אֲלוּמָּה.1\" data-ref=\"Jastrow, אֲלוּמָּה 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_אַמְהוּת.1\" data-ref=\"Jastrow, אַמְהוּת 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_אַרְנְקַי.1\" data-ref=\"Jastrow, אַרְנְקַי 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_בִּירָנִית.1\" data-ref=\"Jastrow, בִּירָנִית 1\">preced.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_גּוּלְגֹּלֶת.1\" data-ref=\"Jastrow, גּוּלְגֹּלֶת 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_זָמַם II.1\" data-ref=\"Jastrow, זָמַם II 1\">same</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_חָכִיר.1\" data-ref=\"Jastrow, חָכִיר 1\">same</a>.",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_כְּנַעֲנִי.1\" data-ref=\"Jastrow, כְּנַעֲנִי 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_כַּשְׂדִּי.1\" data-ref=\"Jastrow, כַּשְׂדִּי 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_מְחִלָּה.1\" data-ref=\"Jastrow, מְחִלָּה 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_מְיַלֶּדֶת.1\" data-ref=\"Jastrow, מְיַלֶּדֶת 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_מְלִילָה.1\" data-ref=\"Jastrow, מְלִילָה 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_מַחְפּוֹרֶת.1\" data-ref=\"Jastrow, מַחְפּוֹרֶת 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_מוֹאָבִי.1\" data-ref=\"Jastrow, מוֹאָבִי 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_נְטוּלָה.1\" data-ref=\"Jastrow, נְטוּלָה 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_נִשְׁבִּים.1\" data-ref=\"Jastrow, נִשְׁבִּים 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_פְּלִשְׁתִּי.1\" data-ref=\"Jastrow, פְּלִשְׁתִּי 1\">next w.</a>",
" ch. = <a class=\"refLink\" href=\"/Jastrow,_פַּקּוּעָה.1\" data-ref=\"Jastrow, פַּקּוּעָה 1\">next w.</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_אוֹפֶן.1\" data-ref=\"Jastrow, אוֹפֶן 1\">אוֹפֶן</a>.",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_אוּר I.1\" data-ref=\"Jastrow, אוּר I 1\">אוּר</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_דּוּכּוֹס.1\" data-ref=\"Jastrow, דּוּכּוֹס 1\">דּוּכּוֹס</a>.",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_הוֹרָאָה.1\" data-ref=\"Jastrow, הוֹרָאָה 1\">הוֹרָאָה</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_זֶמֶר I.1\" data-ref=\"Jastrow, זֶמֶר I 1\">זֶמֶר</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_חֲצָב I.1\" data-ref=\"Jastrow, חֲצָב I 1\">חֲצָב</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_לִיסְטֵיס.1\" data-ref=\"Jastrow, לִיסְטֵיס 1\">ליסְטֵיס</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_מַס I.1\" data-ref=\"Jastrow, מַס I 1\">מַס</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_מוּעָד.1\" data-ref=\"Jastrow, מוּעָד 1\">מוּעֶדֶת</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_נִצְבָּא.1\" data-ref=\"Jastrow, נִצְבָּא 1\">נִצְבָּא</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_עִנְּבָא.1\" data-ref=\"Jastrow, עִנְּבָא 1\">עִנְּבָא</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_עִקְבָא.1\" data-ref=\"Jastrow, עִקְבָא 1\">עִקְבָא</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_פַּח I.1\" data-ref=\"Jastrow, פַּח I 1\">פַּח</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_קְפַץ.1\" data-ref=\"Jastrow, קְפַץ 1\">קְפַץ</a>",
" ch. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_קַנְקָן I.1\" data-ref=\"Jastrow, קַנְקָן I 1\">קַנְקָן</a>",
" ch. = <span dir=\"rtl\">בְּהוֹן</span>",
" ch. = b.",
" ch. = h.",
" ch.,",
" ch.;",
" h. ",
" h. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_סֵיבוּ.1\" data-ref=\"Jastrow, סֵיבוּ 1\">סֵיבוּ</a>",
" h. = ch.",
" h. a. ch.",
" h.,",
"(Ar. ",
"(NH",
"(PBH",
"(b. h. ",
"(b. h. = <a dir=\"rtl\" class=\"refLink\" href=\"/Jastrow,_זֶה.1\" data-ref=\"Jastrow, זֶה 1\">את</a>",
"(b. h. = <span dir=\"rtl\">חתתית</span>",
"(b. h. = <span dir=\"rtl\">קלקל׳</span>",
"(b. h.)",
"(b. h.,",
"(b. h.;",
"(ch. ",
"(h. ",
"BH,",
"FW",
"FW,",
"MH",
"MH,",
"NH",
"NH)",
"PBH",
"PBH,",
"PBH.",
"PN FW",
"PN NH",
"arc",
"ch.",
"eng",
"heb",
"x-pn"
]
Can these be normalized into something more useful?
You're right, this field is currently a mess. We hope to be able to normalize these field in the future although it's not currently a priority. Feel free to make a pull request if you think you can write a script to clean it up.