This repository has been archived by the owner on Jul 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from frictionlessdata/add-language-codes-
add ISO-639-1 language codes WIP
- Loading branch information
Showing
4 changed files
with
322 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
ISO 639-1 assigns a two letter code to each language. | ||
|
||
## Data | ||
|
||
Data derived from https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes with columns for non-ISO-639-1 standards are dropped. | ||
|
||
Data sourced on 2018-02-24. This data will not be updated. Do not use the data for analysis. | ||
|
||
This data package implements the [Language support](https://frictionlessdata.io/specs/patterns/#language-support) pattern. Some properties in the [data package](datapackage.json) have been given both English and Spanish values. | ||
|
||
This pattern is not implemented in data package validation tools. The CSV file has been validated using https://try.goodtables.io | ||
|
||
## License | ||
|
||
Data derived from [List of ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) by [Wikipedia](https://wikimediafoundation.org/wiki/Home) is licensed under [Creative Commons Attribution-ShareAlike License](http://creativecommons.org/licenses/by-sa/3.0/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
ISO language name,Native name (endonym),639-1,Notes | ||
Abkhazian,"аҧсуа бызшәа, аҧсшәа",ab,also known as Abkhaz | ||
Afar,Afaraf,aa, | ||
Afrikaans,Afrikaans,af, | ||
Akan,Akan,ak,"macrolanguage, Twi is [tw/twi], Fanti is [fat]" | ||
Albanian,Shqip,sq,"macrolanguage, ""Albanian Phylozone"" in 639-6" | ||
Amharic,አማርኛ,am, | ||
Arabic,العربية,ar,"macrolanguage, Standard Arabic is [arb]" | ||
Aragonese,aragonés,an, | ||
Armenian,Հայերեն,hy, | ||
Assamese,অসমীয়া,as, | ||
Avaric,"авар мацӀ, магӀарул мацӀ",av,also known as Avar | ||
Avestan,avesta,ae,ancient | ||
Aymara,aymar aru,ay,macrolanguage | ||
Azerbaijani,azərbaycan dili,az,macrolanguage | ||
Bambara,bamanankan,bm, | ||
Bashkir,башҡорт теле,ba, | ||
Basque,"euskara, euskera",eu, | ||
Belarusian,беларуская мова,be, | ||
Bengali,বাংলা,bn,also known as Bangla | ||
Bihari languages,भोजपुरी,bh,"collective language code for Bhojpuri, Magahi, and Maithili" | ||
Bislama,Bislama,bi,"Language formed from English and Ni-Vanuatu, with some French influence." | ||
Bosnian,bosanski jezik,bs, | ||
Breton,brezhoneg,br, | ||
Bulgarian,български език,bg, | ||
Burmese,ဗမာစာ,my, | ||
"Catalan, Valencian","català, valencià",ca, | ||
Central Khmer,"ខ្មែរ, ខេមរភាសា, ភាសាខ្មែរ",km,also known as Khmer or Cambodian | ||
Chamorro,Chamoru,ch, | ||
Chechen,нохчийн мотт,ce, | ||
"Chichewa, Chewa, Nyanja","chiCheŵa, chinyanja",ny, | ||
Chinese,"中文 (Zhōngwén), 汉语, 漢語",zh,macrolanguage | ||
"Church Slavic, Church Slavonic, Old Church Slavonic, Old Slavonic, Old Bulgarian",ѩзыкъ словѣньскъ,cu,"ancient, in use by Orthodox Church" | ||
Chuvash,чӑваш чӗлхи,cv, | ||
Cornish,Kernewek,kw, | ||
Corsican,"corsu, lingua corsa",co, | ||
Cree,ᓀᐦᐃᔭᐍᐏᐣ,cr,macrolanguage | ||
Croatian,hrvatski jezik,hr, | ||
Czech,"čeština, český jazyk",cs, | ||
Danish,dansk,da, | ||
"Divehi, Dhivehi, Maldivian",ދިވެހި,dv, | ||
"Dutch, Flemish","Nederlands, Vlaams",nl, | ||
Dzongkha,རྫོང་ཁ,dz, | ||
English,English,en, | ||
Esperanto,Esperanto,eo,"constructed, initiated from L.L. Zamenhof, 1887" | ||
Estonian,"eesti, eesti keel",et,macrolanguage | ||
Ewe,Eʋegbe,ee, | ||
Faroese,føroyskt,fo, | ||
Fijian,vosa Vakaviti,fj, | ||
Finnish,"suomi, suomen kieli",fi, | ||
French,"français, langue française",fr, | ||
Fulah,"Fulfulde, Pulaar, Pular",ff,"macrolanguage, also known as Fula" | ||
"Gaelic, Scottish Gaelic",Gàidhlig,gd, | ||
Galician,Galego,gl, | ||
Ganda,Luganda,lg, | ||
Georgian,ქართული,ka, | ||
German,Deutsch,de, | ||
Greek (modern),ελληνικά,el, | ||
Guaraní,Avañe'ẽ,gn,macrolanguage | ||
Gujarati,ગુજરાતી,gu, | ||
"Haitian, Haitian Creole",Kreyòl ayisyen,ht, | ||
Hausa,(Hausa) هَوُسَ,ha, | ||
Hebrew (modern),עברית,he, | ||
Herero,Otjiherero,hz, | ||
Hindi,"हिन्दी, हिंदी",hi, | ||
Hiri Motu,Hiri Motu,ho, | ||
Hungarian,magyar,hu, | ||
Icelandic,Íslenska,is, | ||
Ido,Ido,io,"constructed by De Beaufront, 1907, as variation of Esperanto" | ||
Igbo,Asụsụ Igbo,ig, | ||
Indonesian,Bahasa Indonesia,id,Covered by macrolanguage [ms/msa] | ||
Interlingua,Interlingua,ia,constructed by International Auxiliary Language Association | ||
Interlingue,Originally called Occidental; then Interlingue after WWII,ie,"constructed by Edgar de Wahl, first published in 1922" | ||
Inuktitut,ᐃᓄᒃᑎᑐᑦ,iu,macrolanguage | ||
Inupiaq,"Iñupiaq, Iñupiatun",ik,macrolanguage | ||
Irish,Gaeilge,ga, | ||
Italian,Italiano,it, | ||
Japanese,日本語 (にほんご),ja, | ||
Javanese,"ꦧꦱꦗꦮ, Basa Jawa",jv, | ||
"Kalaallisut, Greenlandic","kalaallisut, kalaallit oqaasii",kl, | ||
Kannada,ಕನ್ನಡ,kn, | ||
Kanuri,Kanuri,kr,macrolanguage | ||
Kashmiri,"कश्मीरी, كشميري",ks, | ||
Kazakh,қазақ тілі,kk, | ||
"Kikuyu, Gikuyu",Gĩkũyũ,ki, | ||
Kinyarwanda,Ikinyarwanda,rw, | ||
"Kirghiz, Kyrgyz","Кыргызча, Кыргыз тили",ky, | ||
Komi,коми кыв,kv,macrolanguage | ||
Kongo,Kikongo,kg,macrolanguage | ||
Korean,한국어,ko, | ||
"Kuanyama, Kwanyama",Kuanyama,kj, | ||
Kurdish,"Kurdî, كوردی",ku,macrolanguage | ||
Lao,ພາສາລາວ,lo, | ||
Latin,"latine, lingua latina",la,ancient | ||
Latvian,Latviešu Valoda,lv,macrolanguage | ||
"Limburgan, Limburger, Limburgish",Limburgs,li, | ||
Lingala,Lingála,ln, | ||
Lithuanian,lietuvių kalba,lt, | ||
Luba-Katanga,Kiluba,lu,also known as Luba-Shaba | ||
"Luxembourgish, Letzeburgesch",Lëtzebuergesch,lb, | ||
Macedonian,македонски јазик,mk, | ||
Malagasy,fiteny malagasy,mg,macrolanguage | ||
Malay,"Bahasa Melayu, بهاس ملايو",ms,"macrolanguage, Standard Malay is [zsm], Indonesian is [id/ind]" | ||
Malayalam,മലയാളം,ml, | ||
Maltese,Malti,mt, | ||
Manx,"Gaelg, Gailck",gv, | ||
Maori,te reo Māori,mi,also known as Māori | ||
Marathi,मराठी,mr,also known as Marāṭhī | ||
Marshallese,Kajin M̧ajeļ,mh, | ||
Mongolian,Монгол хэл,mn,macrolanguage | ||
Nauru,Dorerin Naoero,na,also known as Nauruan | ||
"Navajo, Navaho",Diné bizaad,nv, | ||
Ndonga,Owambo,ng, | ||
Nepali,नेपाली,ne, | ||
North Ndebele,isiNdebele,nd,also known as Northern Ndebele | ||
Northern Sami,Davvisámegiella,se, | ||
Norwegian,Norsk,no,"macrolanguage, Bokmål is [nb/nob], Nynorsk is [nn/nno]" | ||
Norwegian Bokmål,Norsk Bokmål,nb,Covered by macrolanguage [no/nor] | ||
Norwegian Nynorsk,Norsk Nynorsk,nn,Covered by macrolanguage [no/nor] | ||
Occitan,"occitan, lenga d'òc",oc, | ||
Ojibwa,ᐊᓂᔑᓈᐯᒧᐎᓐ,oj,"macrolanguage, also known as Ojibwe" | ||
Oriya,ଓଡ଼ିଆ,or,also known as Odia | ||
Oromo,Afaan Oromoo,om,macrolanguage | ||
"Ossetian, Ossetic",ирон æвзаг,os, | ||
Pali,पाऴि,pi,"ancient, also known as Pāli" | ||
"Panjabi, Punjabi",ਪੰਜਾਬੀ,pa, | ||
"Pashto, Pushto",پښتو,ps,macrolanguage | ||
Persian,فارسی,fa,"macrolanguage, also known as Farsi" | ||
Polabian,"wenske rec, Wenske",pox, | ||
Polish,"język polski, Polszczyzna",pl, | ||
Portuguese,Português,pt, | ||
Quechua,"Runa Simi, Kichwa",qu,macrolanguage | ||
"Romanian, Moldavian, Moldovan",Română,ro,"The identifiers mo and mol are deprecated, leaving ro and ron (639-2/T) and rum (639-2/B) the current language identifiers to be used for the variant of the Romanian language also known as Moldavian and Moldovan in English and moldave in French. The identifiers mo and mol will not be assigned to different items, and recordings using these identifiers will not be invalid." | ||
Romansh,Rumantsch Grischun,rm, | ||
Rundi,Ikirundi,rn,also known as Kirundi | ||
Russian,русский,ru, | ||
Samoan,gagana fa'a Samoa,sm, | ||
Sango,yângâ tî sängö,sg, | ||
Sanskrit,संस्कृतम्,sa,"ancient, still spoken, also known as Saṃskṛta" | ||
Sardinian,sardu,sc,macrolanguage | ||
Serbian,српски језик,sr,The ISO 639-2/T code srp deprecated the ISO 639-2/B code scc[1] | ||
Shona,chiShona,sn, | ||
"Sichuan Yi, Nuosu",ꆈꌠ꒿ Nuosuhxop,ii,Standard form of Yi languages | ||
Sindhi,"सिन्धी, سنڌي، سندھی",sd, | ||
"Sinhala, Sinhalese",සිංහල,si, | ||
Slovak,"Slovenčina, Slovenský Jazyk",sk, | ||
Slovenian,"Slovenski Jezik, Slovenščina",sl,also known as Slovene | ||
Somali,"Soomaaliga, af Soomaali",so, | ||
South Ndebele,isiNdebele,nr,also known as Southern Ndebele | ||
Southern Sotho,Sesotho,st, | ||
"Spanish, Castilian",Español,es, | ||
Sundanese,Basa Sunda,su, | ||
Swahili,Kiswahili,sw,macrolanguage | ||
Swati,SiSwati,ss,also known as Swazi | ||
Swedish,Svenska,sv, | ||
Tagalog,Wikang Tagalog,tl,Note: Filipino (Pilipino) has the code [fil] | ||
Tahitian,Reo Tahiti,ty,One of the Reo Mā`ohi (languages of French Polynesia) | ||
Tajik,"тоҷикӣ, toçikī, تاجیکی",tg, | ||
Tamil,தமிழ்,ta, | ||
Tatar,"татар теле, tatar tele",tt, | ||
Telugu,తెలుగు,te, | ||
Thai,ไทย,th, | ||
Tibetan,བོད་ཡིག,bo,also known as Standard Tibetan | ||
Tigrinya,ትግርኛ,ti, | ||
Tonga (Tonga Islands),Faka Tonga,to, | ||
Tsonga,Xitsonga,ts, | ||
Tswana,Setswana,tn, | ||
Turkish,Türkçe,tr, | ||
Turkmen,"Türkmen, Түркмен",tk, | ||
Twi,Twi,tw,Covered by macrolanguage [ak/aka] | ||
"Uighur, Uyghur","ئۇيغۇرچە, Uyghurche",ug, | ||
Ukrainian,Українська,uk, | ||
Urdu,اردو,ur, | ||
Uzbek,"Oʻzbek, Ўзбек, أۇزبېك",uz,macrolanguage | ||
Venda,Tshivenḓa,ve, | ||
Vietnamese,Tiếng Việt,vi, | ||
Volapük,Volapük,vo,constructed | ||
Walloon,Walon,wa, | ||
Welsh,Cymraeg,cy, | ||
Western Frisian,Frysk,fy,also known as Frisian | ||
Wolof,Wollof,wo, | ||
Xhosa,isiXhosa,xh, | ||
Yiddish,ייִדיש,yi,macrolanguage | ||
Yoruba,Yorùbá,yo, | ||
"Zhuang, Chuang","Saɯ cueŋƅ, Saw cuengh",za,macrolanguage | ||
Zulu,isiZulu,zu, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
{ | ||
"profile": "tabular-data-package", | ||
"name": "iso-639-1-language-codes", | ||
"languages": ["en", "es"], | ||
"title": { | ||
"": "ISO 639-1 Language Codes", | ||
"es": "ISO 639-1 Códigos de idioma" | ||
}, | ||
"description": { | ||
"": "ISO 639-1 two-letter language codes", | ||
"es": "ISO 639-1 códigos de idioma de dos letras" | ||
}, | ||
"version": "0.1.0", | ||
"keywords": { | ||
"": "language", | ||
"es": "idioma" | ||
}, | ||
"licenses": [{ | ||
"title": { | ||
"": "Creative Commons Attribution Share-Alike 3.0", | ||
"es": "Reconocimiento-CompartirIgual 3.0 España" | ||
}, | ||
"path": { | ||
"": "https://creativecommons.org/licenses/by-sa/3.0/", | ||
"es": "https://creativecommons.org/licenses/by-sa/3.0/es/" | ||
} | ||
}], | ||
"contributors": [{ | ||
"title": "Joe Bloggs", | ||
"email": "[email protected]", | ||
"path": "http://www.bloggs.com", | ||
"role": { | ||
"": "author", | ||
"es": "autor" | ||
}, | ||
"organization": { | ||
"": "International Organization for Standardization", | ||
"es": "Organización Internacional para la Estandarización" | ||
} | ||
}], | ||
"resources": [{ | ||
"profile": "tabular-data-resource", | ||
"path": "data/ISO-639-1-codes.csv", | ||
"name": "iso-639-1-codes", | ||
"title": { | ||
"": "ISO 639-1 language codes", | ||
"es": "ISO 639-1 códigos de idioma" | ||
}, | ||
"description": { | ||
"": "ISO 639-1: two-letter language codes", | ||
"es": "ISO 639-1 códigos de idioma de dos letras" | ||
}, | ||
"encoding": "utf-8", | ||
"format": "csv", | ||
"mediatype": "text/csv", | ||
"sources": [{ | ||
"title": "List of ISO 639-1 codes", | ||
"path": "https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes" | ||
}], | ||
"schema": { | ||
"fields": [{ | ||
"name": "ISO language name", | ||
"title": { | ||
"": "ISO language name", | ||
"es": "ISO Nombre del lenguaje" | ||
}, | ||
"type": "string", | ||
"format": "default", | ||
"constraints": { | ||
"required": true | ||
} | ||
}, | ||
{ | ||
"name": "Native name (endonym)", | ||
"title": { | ||
"": "Native name (endonym)", | ||
"es": "Nombre nativo (endónimo)" | ||
}, | ||
"type": "string", | ||
"format": "default" | ||
}, | ||
{ | ||
"name": "639-1", | ||
"type": "string", | ||
"format": "default" | ||
}, | ||
{ | ||
"name": "Notes", | ||
"title": { | ||
"": "Notes", | ||
"es": "Notas" | ||
}, | ||
"type": "string", | ||
"format": "default" | ||
} | ||
], | ||
"missingValues": [ | ||
"" | ||
], | ||
"primaryKeys": [ | ||
"639-1" | ||
] | ||
}, | ||
"dialect": { | ||
"delimiter": ",", | ||
"quoteChar": "\"", | ||
"header": true, | ||
"doubleQuote": true, | ||
"lineTerminator": "\r\n", | ||
"skipInitialSpace": true, | ||
"caseSensitiveHeader": false | ||
} | ||
}] | ||
} |