On 10/12/20 5:11 AM, Florian Weimer wrote:
Store the locales list as a Lua table in the spec file. Add Lua
code and a new Python script, parse-SUPPORTED.py, to compute
a common representation from it.
This is in response to this bug:
<
https://bugzilla.redhat.com/show_bug.cgi?id=1887097>
The semantics of the processing we do have changed slightly with this
patch. After the patch we ignore charset, which is perfectly acceptable
because what we're trying to do is watch out for entirely new language
and region entries that might create new subpackages and decide what to
do in those cases (likewise remove a language or region). The sorting
of the data table has moved into parse-SUPPORTED.py where we instead sort
the glibc SUPPORTED data we read into the set (no gurantee SUPPORTED is
sorted, and previously we were comparing spec SUPPORTEd to glibc SUPPORTED
one copied from the other). The determinism of the language pack generation
is guaranteed by the data table order so we don't need sorting there either.
Overall this looks like it should solve 1887097, and if the parser can't
run parase-SUPPORTED.py it doesn't matter since that is no longer required
to create a fully parsed spec file.
OK for rawhide.
Reviewed-by: Carlos O'Donell <carlos(a)redhat.com>
---
SUPPORTED | 496 -----------------------------------------------------
convnames.py | 18 --
glibc.spec | 418 ++++++++++++++++++++++++++++++++++----------
parse-SUPPORTED.py | 40 +++++
4 files changed, 366 insertions(+), 606 deletions(-)
diff --git a/SUPPORTED b/SUPPORTED
deleted file mode 100644
index cd785d7..0000000
--- a/SUPPORTED
+++ /dev/null
@@ -1,496 +0,0 @@
-# This file names the currently supported and somewhat tested locales.
-# If you have any additions please file a glibc bug report.
-SUPPORTED-LOCALES=\
-C.UTF-8/UTF-8 \
-aa_DJ.UTF-8/UTF-8 \
-aa_DJ/ISO-8859-1 \
-aa_ER/UTF-8 \
-aa_ER@saaho/UTF-8 \
-aa_ET/UTF-8 \
-af_ZA.UTF-8/UTF-8 \
-af_ZA/ISO-8859-1 \
-agr_PE/UTF-8 \
-ak_GH/UTF-8 \
-am_ET/UTF-8 \
-an_ES.UTF-8/UTF-8 \
-an_ES/ISO-8859-15 \
-anp_IN/UTF-8 \
-ar_AE.UTF-8/UTF-8 \
-ar_AE/ISO-8859-6 \
-ar_BH.UTF-8/UTF-8 \
-ar_BH/ISO-8859-6 \
-ar_DZ.UTF-8/UTF-8 \
-ar_DZ/ISO-8859-6 \
-ar_EG.UTF-8/UTF-8 \
-ar_EG/ISO-8859-6 \
-ar_IN/UTF-8 \
-ar_IQ.UTF-8/UTF-8 \
-ar_IQ/ISO-8859-6 \
-ar_JO.UTF-8/UTF-8 \
-ar_JO/ISO-8859-6 \
-ar_KW.UTF-8/UTF-8 \
-ar_KW/ISO-8859-6 \
-ar_LB.UTF-8/UTF-8 \
-ar_LB/ISO-8859-6 \
-ar_LY.UTF-8/UTF-8 \
-ar_LY/ISO-8859-6 \
-ar_MA.UTF-8/UTF-8 \
-ar_MA/ISO-8859-6 \
-ar_OM.UTF-8/UTF-8 \
-ar_OM/ISO-8859-6 \
-ar_QA.UTF-8/UTF-8 \
-ar_QA/ISO-8859-6 \
-ar_SA.UTF-8/UTF-8 \
-ar_SA/ISO-8859-6 \
-ar_SD.UTF-8/UTF-8 \
-ar_SD/ISO-8859-6 \
-ar_SS/UTF-8 \
-ar_SY.UTF-8/UTF-8 \
-ar_SY/ISO-8859-6 \
-ar_TN.UTF-8/UTF-8 \
-ar_TN/ISO-8859-6 \
-ar_YE.UTF-8/UTF-8 \
-ar_YE/ISO-8859-6 \
-ayc_PE/UTF-8 \
-az_AZ/UTF-8 \
-az_IR/UTF-8 \
-as_IN/UTF-8 \
-ast_ES.UTF-8/UTF-8 \
-ast_ES/ISO-8859-15 \
-be_BY.UTF-8/UTF-8 \
-be_BY/CP1251 \
-be_BY@latin/UTF-8 \
-bem_ZM/UTF-8 \
-ber_DZ/UTF-8 \
-ber_MA/UTF-8 \
-bg_BG.UTF-8/UTF-8 \
-bg_BG/CP1251 \
-bhb_IN.UTF-8/UTF-8 \
-bho_IN/UTF-8 \
-bho_NP/UTF-8 \
-bi_VU/UTF-8 \
-bn_BD/UTF-8 \
-bn_IN/UTF-8 \
-bo_CN/UTF-8 \
-bo_IN/UTF-8 \
-br_FR.UTF-8/UTF-8 \
-br_FR/ISO-8859-1 \
-br_FR@euro/ISO-8859-15 \
-brx_IN/UTF-8 \
-bs_BA.UTF-8/UTF-8 \
-bs_BA/ISO-8859-2 \
-byn_ER/UTF-8 \
-ca_AD.UTF-8/UTF-8 \
-ca_AD/ISO-8859-15 \
-ca_ES.UTF-8/UTF-8 \
-ca_ES/ISO-8859-1 \
-ca_ES@euro/ISO-8859-15 \
-ca_ES@valencia/UTF-8 \
-ca_FR.UTF-8/UTF-8 \
-ca_FR/ISO-8859-15 \
-ca_IT.UTF-8/UTF-8 \
-ca_IT/ISO-8859-15 \
-ce_RU/UTF-8 \
-chr_US/UTF-8 \
-ckb_IQ/UTF-8 \
-cmn_TW/UTF-8 \
-crh_UA/UTF-8 \
-cs_CZ.UTF-8/UTF-8 \
-cs_CZ/ISO-8859-2 \
-csb_PL/UTF-8 \
-cv_RU/UTF-8 \
-cy_GB.UTF-8/UTF-8 \
-cy_GB/ISO-8859-14 \
-da_DK.UTF-8/UTF-8 \
-da_DK/ISO-8859-1 \
-da_DK.ISO-8859-15/ISO-8859-15 \
-de_AT.UTF-8/UTF-8 \
-de_AT/ISO-8859-1 \
-de_AT@euro/ISO-8859-15 \
-de_BE.UTF-8/UTF-8 \
-de_BE/ISO-8859-1 \
-de_BE@euro/ISO-8859-15 \
-de_CH.UTF-8/UTF-8 \
-de_CH/ISO-8859-1 \
-de_DE.UTF-8/UTF-8 \
-de_DE/ISO-8859-1 \
-de_DE@euro/ISO-8859-15 \
-de_IT.UTF-8/UTF-8 \
-de_IT/ISO-8859-1 \
-de_LI.UTF-8/UTF-8 \
-de_LU.UTF-8/UTF-8 \
-de_LU/ISO-8859-1 \
-de_LU@euro/ISO-8859-15 \
-doi_IN/UTF-8 \
-dsb_DE/UTF-8 \
-dv_MV/UTF-8 \
-dz_BT/UTF-8 \
-el_GR.UTF-8/UTF-8 \
-el_GR/ISO-8859-7 \
-el_GR@euro/ISO-8859-7 \
-el_CY.UTF-8/UTF-8 \
-el_CY/ISO-8859-7 \
-en_AG/UTF-8 \
-en_AU.UTF-8/UTF-8 \
-en_AU/ISO-8859-1 \
-en_BW.UTF-8/UTF-8 \
-en_BW/ISO-8859-1 \
-en_CA.UTF-8/UTF-8 \
-en_CA/ISO-8859-1 \
-en_DK.UTF-8/UTF-8 \
-en_DK/ISO-8859-1 \
-en_GB.UTF-8/UTF-8 \
-en_GB/ISO-8859-1 \
-en_GB.ISO-8859-15/ISO-8859-15 \
-en_HK.UTF-8/UTF-8 \
-en_HK/ISO-8859-1 \
-en_IE.UTF-8/UTF-8 \
-en_IE/ISO-8859-1 \
-en_IE@euro/ISO-8859-15 \
-en_IL/UTF-8 \
-en_IN/UTF-8 \
-en_NG/UTF-8 \
-en_NZ.UTF-8/UTF-8 \
-en_NZ/ISO-8859-1 \
-en_PH.UTF-8/UTF-8 \
-en_PH/ISO-8859-1 \
-en_SC.UTF-8/UTF-8 \
-en_SG.UTF-8/UTF-8 \
-en_SG/ISO-8859-1 \
-en_US.UTF-8/UTF-8 \
-en_US/ISO-8859-1 \
-en_US.ISO-8859-15/ISO-8859-15 \
-en_ZA.UTF-8/UTF-8 \
-en_ZA/ISO-8859-1 \
-en_ZM/UTF-8 \
-en_ZW.UTF-8/UTF-8 \
-en_ZW/ISO-8859-1 \
-eo/UTF-8 \
-es_AR.UTF-8/UTF-8 \
-es_AR/ISO-8859-1 \
-es_BO.UTF-8/UTF-8 \
-es_BO/ISO-8859-1 \
-es_CL.UTF-8/UTF-8 \
-es_CL/ISO-8859-1 \
-es_CO.UTF-8/UTF-8 \
-es_CO/ISO-8859-1 \
-es_CR.UTF-8/UTF-8 \
-es_CR/ISO-8859-1 \
-es_CU/UTF-8 \
-es_DO.UTF-8/UTF-8 \
-es_DO/ISO-8859-1 \
-es_EC.UTF-8/UTF-8 \
-es_EC/ISO-8859-1 \
-es_ES.UTF-8/UTF-8 \
-es_ES/ISO-8859-1 \
-es_ES@euro/ISO-8859-15 \
-es_GT.UTF-8/UTF-8 \
-es_GT/ISO-8859-1 \
-es_HN.UTF-8/UTF-8 \
-es_HN/ISO-8859-1 \
-es_MX.UTF-8/UTF-8 \
-es_MX/ISO-8859-1 \
-es_NI.UTF-8/UTF-8 \
-es_NI/ISO-8859-1 \
-es_PA.UTF-8/UTF-8 \
-es_PA/ISO-8859-1 \
-es_PE.UTF-8/UTF-8 \
-es_PE/ISO-8859-1 \
-es_PR.UTF-8/UTF-8 \
-es_PR/ISO-8859-1 \
-es_PY.UTF-8/UTF-8 \
-es_PY/ISO-8859-1 \
-es_SV.UTF-8/UTF-8 \
-es_SV/ISO-8859-1 \
-es_US.UTF-8/UTF-8 \
-es_US/ISO-8859-1 \
-es_UY.UTF-8/UTF-8 \
-es_UY/ISO-8859-1 \
-es_VE.UTF-8/UTF-8 \
-es_VE/ISO-8859-1 \
-et_EE.UTF-8/UTF-8 \
-et_EE/ISO-8859-1 \
-et_EE.ISO-8859-15/ISO-8859-15 \
-eu_ES.UTF-8/UTF-8 \
-eu_ES/ISO-8859-1 \
-eu_ES@euro/ISO-8859-15 \
-fa_IR/UTF-8 \
-ff_SN/UTF-8 \
-fi_FI.UTF-8/UTF-8 \
-fi_FI/ISO-8859-1 \
-fi_FI@euro/ISO-8859-15 \
-fil_PH/UTF-8 \
-fo_FO.UTF-8/UTF-8 \
-fo_FO/ISO-8859-1 \
-fr_BE.UTF-8/UTF-8 \
-fr_BE/ISO-8859-1 \
-fr_BE@euro/ISO-8859-15 \
-fr_CA.UTF-8/UTF-8 \
-fr_CA/ISO-8859-1 \
-fr_CH.UTF-8/UTF-8 \
-fr_CH/ISO-8859-1 \
-fr_FR.UTF-8/UTF-8 \
-fr_FR/ISO-8859-1 \
-fr_FR@euro/ISO-8859-15 \
-fr_LU.UTF-8/UTF-8 \
-fr_LU/ISO-8859-1 \
-fr_LU@euro/ISO-8859-15 \
-fur_IT/UTF-8 \
-fy_NL/UTF-8 \
-fy_DE/UTF-8 \
-ga_IE.UTF-8/UTF-8 \
-ga_IE/ISO-8859-1 \
-ga_IE@euro/ISO-8859-15 \
-gd_GB.UTF-8/UTF-8 \
-gd_GB/ISO-8859-15 \
-gez_ER/UTF-8 \
-gez_ER@abegede/UTF-8 \
-gez_ET/UTF-8 \
-gez_ET@abegede/UTF-8 \
-gl_ES.UTF-8/UTF-8 \
-gl_ES/ISO-8859-1 \
-gl_ES@euro/ISO-8859-15 \
-gu_IN/UTF-8 \
-gv_GB.UTF-8/UTF-8 \
-gv_GB/ISO-8859-1 \
-ha_NG/UTF-8 \
-hak_TW/UTF-8 \
-he_IL.UTF-8/UTF-8 \
-he_IL/ISO-8859-8 \
-hi_IN/UTF-8 \
-hif_FJ/UTF-8 \
-hne_IN/UTF-8 \
-hr_HR.UTF-8/UTF-8 \
-hr_HR/ISO-8859-2 \
-hsb_DE/ISO-8859-2 \
-hsb_DE.UTF-8/UTF-8 \
-ht_HT/UTF-8 \
-hu_HU.UTF-8/UTF-8 \
-hu_HU/ISO-8859-2 \
-hy_AM/UTF-8 \
-hy_AM.ARMSCII-8/ARMSCII-8 \
-ia_FR/UTF-8 \
-id_ID.UTF-8/UTF-8 \
-id_ID/ISO-8859-1 \
-ig_NG/UTF-8 \
-ik_CA/UTF-8 \
-is_IS.UTF-8/UTF-8 \
-is_IS/ISO-8859-1 \
-it_CH.UTF-8/UTF-8 \
-it_CH/ISO-8859-1 \
-it_IT.UTF-8/UTF-8 \
-it_IT/ISO-8859-1 \
-it_IT@euro/ISO-8859-15 \
-iu_CA/UTF-8 \
-ja_JP.EUC-JP/EUC-JP \
-ja_JP.UTF-8/UTF-8 \
-ka_GE.UTF-8/UTF-8 \
-ka_GE/GEORGIAN-PS \
-kab_DZ/UTF-8 \
-kk_KZ.UTF-8/UTF-8 \
-kk_KZ/PT154 \
-kl_GL.UTF-8/UTF-8 \
-kl_GL/ISO-8859-1 \
-km_KH/UTF-8 \
-kn_IN/UTF-8 \
-ko_KR.EUC-KR/EUC-KR \
-ko_KR.UTF-8/UTF-8 \
-kok_IN/UTF-8 \
-ks_IN/UTF-8 \
-ks_IN@devanagari/UTF-8 \
-ku_TR.UTF-8/UTF-8 \
-ku_TR/ISO-8859-9 \
-kw_GB.UTF-8/UTF-8 \
-kw_GB/ISO-8859-1 \
-ky_KG/UTF-8 \
-lb_LU/UTF-8 \
-lg_UG.UTF-8/UTF-8 \
-lg_UG/ISO-8859-10 \
-li_BE/UTF-8 \
-li_NL/UTF-8 \
-lij_IT/UTF-8 \
-ln_CD/UTF-8 \
-lo_LA/UTF-8 \
-lt_LT.UTF-8/UTF-8 \
-lt_LT/ISO-8859-13 \
-lv_LV.UTF-8/UTF-8 \
-lv_LV/ISO-8859-13 \
-lzh_TW/UTF-8 \
-mag_IN/UTF-8 \
-mai_IN/UTF-8 \
-mai_NP/UTF-8 \
-mfe_MU/UTF-8 \
-mg_MG.UTF-8/UTF-8 \
-mg_MG/ISO-8859-15 \
-mhr_RU/UTF-8 \
-mi_NZ.UTF-8/UTF-8 \
-mi_NZ/ISO-8859-13 \
-miq_NI/UTF-8 \
-mjw_IN/UTF-8 \
-mk_MK.UTF-8/UTF-8 \
-mk_MK/ISO-8859-5 \
-ml_IN/UTF-8 \
-mn_MN/UTF-8 \
-mni_IN/UTF-8 \
-mnw_MM/UTF-8 \
-mr_IN/UTF-8 \
-ms_MY.UTF-8/UTF-8 \
-ms_MY/ISO-8859-1 \
-mt_MT.UTF-8/UTF-8 \
-mt_MT/ISO-8859-3 \
-my_MM/UTF-8 \
-nan_TW/UTF-8 \
-nan_TW@latin/UTF-8 \
-nb_NO.UTF-8/UTF-8 \
-nb_NO/ISO-8859-1 \
-nds_DE/UTF-8 \
-nds_NL/UTF-8 \
-ne_NP/UTF-8 \
-nhn_MX/UTF-8 \
-niu_NU/UTF-8 \
-niu_NZ/UTF-8 \
-nl_AW/UTF-8 \
-nl_BE.UTF-8/UTF-8 \
-nl_BE/ISO-8859-1 \
-nl_BE@euro/ISO-8859-15 \
-nl_NL.UTF-8/UTF-8 \
-nl_NL/ISO-8859-1 \
-nl_NL@euro/ISO-8859-15 \
-nn_NO.UTF-8/UTF-8 \
-nn_NO/ISO-8859-1 \
-nr_ZA/UTF-8 \
-nso_ZA/UTF-8 \
-oc_FR.UTF-8/UTF-8 \
-oc_FR/ISO-8859-1 \
-om_ET/UTF-8 \
-om_KE.UTF-8/UTF-8 \
-om_KE/ISO-8859-1 \
-or_IN/UTF-8 \
-os_RU/UTF-8 \
-pa_IN/UTF-8 \
-pa_PK/UTF-8 \
-pap_AW/UTF-8 \
-pap_CW/UTF-8 \
-pl_PL.UTF-8/UTF-8 \
-pl_PL/ISO-8859-2 \
-ps_AF/UTF-8 \
-pt_BR.UTF-8/UTF-8 \
-pt_BR/ISO-8859-1 \
-pt_PT.UTF-8/UTF-8 \
-pt_PT/ISO-8859-1 \
-pt_PT@euro/ISO-8859-15 \
-quz_PE/UTF-8 \
-raj_IN/UTF-8 \
-ro_RO.UTF-8/UTF-8 \
-ro_RO/ISO-8859-2 \
-ru_RU.KOI8-R/KOI8-R \
-ru_RU.UTF-8/UTF-8 \
-ru_RU/ISO-8859-5 \
-ru_UA.UTF-8/UTF-8 \
-ru_UA/KOI8-U \
-rw_RW/UTF-8 \
-sa_IN/UTF-8 \
-sah_RU/UTF-8 \
-sat_IN/UTF-8 \
-sc_IT/UTF-8 \
-sd_IN/UTF-8 \
-sd_IN@devanagari/UTF-8 \
-se_NO/UTF-8 \
-sgs_LT/UTF-8 \
-shn_MM/UTF-8 \
-shs_CA/UTF-8 \
-si_LK/UTF-8 \
-sid_ET/UTF-8 \
-sk_SK.UTF-8/UTF-8 \
-sk_SK/ISO-8859-2 \
-sl_SI.UTF-8/UTF-8 \
-sl_SI/ISO-8859-2 \
-sm_WS/UTF-8 \
-so_DJ.UTF-8/UTF-8 \
-so_DJ/ISO-8859-1 \
-so_ET/UTF-8 \
-so_KE.UTF-8/UTF-8 \
-so_KE/ISO-8859-1 \
-so_SO.UTF-8/UTF-8 \
-so_SO/ISO-8859-1 \
-sq_AL.UTF-8/UTF-8 \
-sq_AL/ISO-8859-1 \
-sq_MK/UTF-8 \
-sr_ME/UTF-8 \
-sr_RS/UTF-8 \
-sr_RS@latin/UTF-8 \
-ss_ZA/UTF-8 \
-st_ZA.UTF-8/UTF-8 \
-st_ZA/ISO-8859-1 \
-sv_FI.UTF-8/UTF-8 \
-sv_FI/ISO-8859-1 \
-sv_FI@euro/ISO-8859-15 \
-sv_SE.UTF-8/UTF-8 \
-sv_SE/ISO-8859-1 \
-sv_SE.ISO-8859-15/ISO-8859-15 \
-sw_KE/UTF-8 \
-sw_TZ/UTF-8 \
-szl_PL/UTF-8 \
-ta_IN/UTF-8 \
-ta_LK/UTF-8 \
-tcy_IN.UTF-8/UTF-8 \
-te_IN/UTF-8 \
-tg_TJ.UTF-8/UTF-8 \
-tg_TJ/KOI8-T \
-th_TH.UTF-8/UTF-8 \
-th_TH/TIS-620 \
-the_NP/UTF-8 \
-ti_ER/UTF-8 \
-ti_ET/UTF-8 \
-tig_ER/UTF-8 \
-tk_TM/UTF-8 \
-tl_PH.UTF-8/UTF-8 \
-tl_PH/ISO-8859-1 \
-tn_ZA/UTF-8 \
-to_TO/UTF-8 \
-tpi_PG/UTF-8 \
-tr_CY.UTF-8/UTF-8 \
-tr_CY/ISO-8859-9 \
-tr_TR.UTF-8/UTF-8 \
-tr_TR/ISO-8859-9 \
-ts_ZA/UTF-8 \
-tt_RU/UTF-8 \
-tt_RU@iqtelif/UTF-8 \
-ug_CN/UTF-8 \
-uk_UA.UTF-8/UTF-8 \
-uk_UA/KOI8-U \
-unm_US/UTF-8 \
-ur_IN/UTF-8 \
-ur_PK/UTF-8 \
-uz_UZ.UTF-8/UTF-8 \
-uz_UZ/ISO-8859-1 \
-uz_UZ@cyrillic/UTF-8 \
-ve_ZA/UTF-8 \
-vi_VN/UTF-8 \
-wa_BE/ISO-8859-1 \
-wa_BE@euro/ISO-8859-15 \
-wa_BE.UTF-8/UTF-8 \
-wae_CH/UTF-8 \
-wal_ET/UTF-8 \
-wo_SN/UTF-8 \
-xh_ZA.UTF-8/UTF-8 \
-xh_ZA/ISO-8859-1 \
-yi_US.UTF-8/UTF-8 \
-yi_US/CP1255 \
-yo_NG/UTF-8 \
-yue_HK/UTF-8 \
-yuw_PG/UTF-8 \
-zh_CN.GB18030/GB18030 \
-zh_CN.GBK/GBK \
-zh_CN.UTF-8/UTF-8 \
-zh_CN/GB2312 \
-zh_HK.UTF-8/UTF-8 \
-zh_HK/BIG5-HKSCS \
-zh_SG.UTF-8/UTF-8 \
-zh_SG.GBK/GBK \
-zh_SG/GB2312 \
-zh_TW.EUC-TW/EUC-TW \
-zh_TW.UTF-8/UTF-8 \
-zh_TW/BIG5 \
-zu_ZA.UTF-8/UTF-8 \
-zu_ZA/ISO-8859-1 \
OK, remove SUPPORTED from the dist-git directory because we want to make
the spec file self-contained.
diff --git a/convnames.py b/convnames.py
deleted file mode 100755
index 4a93041..0000000
--- a/convnames.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/python3
-# This code is called by glibc.spec via lua to generate the mapping
-# from language code to language name. The code uses langtable to
-# do the mapping. The information in langtable is a harmonization
-# of CLDR and glibc lang_name data.
-import sys
-try:
- import langtable
-except ImportError:
- # if the import fails, don't translate anything
- langtable = None
-
-for lang in sys.argv[1:]:
- if langtable:
- name = langtable.language_name(languageId=lang, languageIdQuery='en')
- print(name or lang)
- else:
- print(lang)
OK. Remove the name converter which uses langtable maintained by Mike Fabian.
diff --git a/glibc.spec b/glibc.spec
index 67a85e2..c622b88 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -130,20 +130,9 @@ Source0: %{?glibc_release_url}%{glibcsrcdir}.tar.xz
Source1: nscd.conf
Source2: bench.mk
Source3: glibc-bench-compare
-# A copy of localedata/SUPPORTED in the Source0 tarball. The
-# SUPPORTED file is used below to generate the list of locale
-# packages, using a Lua snippet.
-# When the upstream SUPPORTED is out of sync with our copy, the
-# prep phase will fail and you will need to update the local
-# copy.
-Source11: SUPPORTED
OK. Removed.
+Source11: parse-SUPPORTED.py
OK. New.
# Include in the source RPM for reference.
Source12: ChangeLog.old
-# Provide ISO language code to name translation using Python's
-# langtable. The langtable data is maintained by the Fedora
-# i18n team and is a harmonization of CLDR and glibc lang_name
-# data in a more accessible API (also used by Anaconda).
-Source13: convnames.py
OK. Removed.
##############################################################################
# Patches:
@@ -238,7 +227,6 @@ BuildRequires: systemd
# distributions, python3 does not actually install /usr/bin/python3,
# so we also depend on python3-devel.
BuildRequires: python3 python3-devel
-BuildRequires: python3dist(langtable)
OK. No longer needed since we only use it to update the table by hand at conflict time.
# This GCC version is needed for -fstack-clash-protection support.
BuildRequires: gcc >= 7.2.1-6
@@ -432,84 +420,331 @@ If you are building custom locales you will most likely use
these sources as the basis for your new locale.
%{lua:
--- Array of languages (ISO-639 codes).
-local languages = {}
--- Dictionary from language codes (as in the languages array) to arrays
--- of regions.
-local supplements = {}
-do
- -- Parse the SUPPORTED file. Eliminate duplicates.
- local lang_region_seen = {}
- for line in io.lines(rpm.expand("%{SOURCE11}")) do
- -- Match lines which contain a language (eo) or language/region
- -- (en_US) strings.
- local lang_region = string.match(line, "^([a-z][^/@.]+)")
- if lang_region ~= nil then
- if lang_region_seen[lang_region] == nil then
- lang_region_seen[lang_region] = true
+-- To make lua-mode hapyy: '
- -- Split language/region pair.
- local lang, region = string.match(lang_region, "^(.+)_(.+)")
- if lang == nil then
- -- Region is missing, use only the language.
- lang = lang_region
- end
- local suppl = supplements[lang]
- if suppl == nil then
- suppl = {}
- supplements[lang] = suppl
- -- New language not seen before.
- languages[#languages + 1] = lang
- end
- if region ~= nil then
- -- New region because of the check against
- -- lang_region_seen above.
- suppl[#suppl + 1] = region
- end
+-- List of supported locales. This is used to generate the langpack
+-- subpackages below. This table needs adjustments if the set of
+-- glibc locales changes. "code" is the glibc code for the language
+-- (before the "_". "name" is the English translation of the
language
+-- name (for use in subpackage descriptions). "regions" is a table of
+-- variant specifiers (after the "_", excluding "@" and
"."
+-- variants/charset specifiers). The table must be sorted by the code
+-- field, and the regions table must be sorted as well.
OK. Call out the sorting requirement.
+--
+-- English translations of language names can be obtained using (for
+-- the "aa" language in this example):
+--
+-- python3 -c 'import langtable; print(langtable.language_name("aa",
languageIdQuery="en"))'
OK.
+
+local locales = {
+ { code="aa", name="Afar", regions={ "DJ",
"ER", "ET" } },
+ { code="af", name="Afrikaans", regions={ "ZA" } },
+ { code="agr", name="Aguaruna", regions={ "PE" } },
+ { code="ak", name="Akan", regions={ "GH" } },
+ { code="am", name="Amharic", regions={ "ET" } },
+ { code="an", name="Aragonese", regions={ "ES" } },
+ { code="anp", name="Angika", regions={ "IN" } },
+ {
+ code="ar",
+ name="Arabic",
+ regions={
+ "AE",
+ "BH",
+ "DZ",
+ "EG",
+ "IN",
+ "IQ",
+ "JO",
+ "KW",
+ "LB",
+ "LY",
+ "MA",
+ "OM",
+ "QA",
+ "SA",
+ "SD",
+ "SS",
+ "SY",
+ "TN",
+ "YE"
+ }
+ },
+ { code="as", name="Assamese", regions={ "IN" } },
+ { code="ast", name="Asturian", regions={ "ES" } },
+ { code="ayc", name="Southern Aymara", regions={ "PE" }
},
+ { code="az", name="Azerbaijani", regions={ "AZ",
"IR" } },
+ { code="be", name="Belarusian", regions={ "BY" } },
+ { code="bem", name="Bemba", regions={ "ZM" } },
+ { code="ber", name="Berber", regions={ "DZ",
"MA" } },
+ { code="bg", name="Bulgarian", regions={ "BG" } },
+ { code="bhb", name="Bhili", regions={ "IN" } },
+ { code="bho", name="Bhojpuri", regions={ "IN",
"NP" } },
+ { code="bi", name="Bislama", regions={ "VU" } },
+ { code="bn", name="Bangla", regions={ "BD",
"IN" } },
+ { code="bo", name="Tibetan", regions={ "CN",
"IN" } },
+ { code="br", name="Breton", regions={ "FR" } },
+ { code="brx", name="Bodo", regions={ "IN" } },
+ { code="bs", name="Bosnian", regions={ "BA" } },
+ { code="byn", name="Blin", regions={ "ER" } },
+ { code="ca", name="Catalan", regions={ "AD",
"ES", "FR", "IT" } },
+ { code="ce", name="Chechen", regions={ "RU" } },
+ { code="chr", name="Cherokee", regions={ "US" } },
+ { code="ckb", name="Central Kurdish", regions={ "IQ" }
},
+ { code="cmn", name="Mandarin Chinese", regions={ "TW" }
},
+ { code="crh", name="Crimean Turkish", regions={ "UA" }
},
+ { code="cs", name="Czech", regions={ "CZ" } },
+ { code="csb", name="Kashubian", regions={ "PL" } },
+ { code="cv", name="Chuvash", regions={ "RU" } },
+ { code="cy", name="Welsh", regions={ "GB" } },
+ { code="da", name="Danish", regions={ "DK" } },
+ {
+ code="de",
+ name="German",
+ regions={ "AT", "BE", "CH", "DE",
"IT", "LI", "LU" }
+ },
+ { code="doi", name="Dogri", regions={ "IN" } },
+ { code="dsb", name="Lower Sorbian", regions={ "DE" } },
+ { code="dv", name="Divehi", regions={ "MV" } },
+ { code="dz", name="Dzongkha", regions={ "BT" } },
+ { code="el", name="Greek", regions={ "CY",
"GR" } },
+ {
+ code="en",
+ name="English",
+ regions={
+ "AG",
+ "AU",
+ "BW",
+ "CA",
+ "DK",
+ "GB",
+ "HK",
+ "IE",
+ "IL",
+ "IN",
+ "NG",
+ "NZ",
+ "PH",
+ "SC",
+ "SG",
+ "US",
+ "ZA",
+ "ZM",
+ "ZW"
+ }
+ },
+ { code="eo", name="Esperanto", regions={} },
+ {
+ code="es",
+ name="Spanish",
+ regions={
+ "AR",
+ "BO",
+ "CL",
+ "CO",
+ "CR",
+ "CU",
+ "DO",
+ "EC",
+ "ES",
+ "GT",
+ "HN",
+ "MX",
+ "NI",
+ "PA",
+ "PE",
+ "PR",
+ "PY",
+ "SV",
+ "US",
+ "UY",
+ "VE"
+ }
+ },
+ { code="et", name="Estonian", regions={ "EE" } },
+ { code="eu", name="Basque", regions={ "ES" } },
+ { code="fa", name="Persian", regions={ "IR" } },
+ { code="ff", name="Fulah", regions={ "SN" } },
+ { code="fi", name="Finnish", regions={ "FI" } },
+ { code="fil", name="Filipino", regions={ "PH" } },
+ { code="fo", name="Faroese", regions={ "FO" } },
+ { code="fr", name="French", regions={ "BE",
"CA", "CH", "FR", "LU" } },
+ { code="fur", name="Friulian", regions={ "IT" } },
+ { code="fy", name="Western Frisian", regions={ "DE",
"NL" } },
+ { code="ga", name="Irish", regions={ "IE" } },
+ { code="gd", name="Scottish Gaelic", regions={ "GB" }
},
+ { code="gez", name="Geez", regions={ "ER",
"ET" } },
+ { code="gl", name="Galician", regions={ "ES" } },
+ { code="gu", name="Gujarati", regions={ "IN" } },
+ { code="gv", name="Manx", regions={ "GB" } },
+ { code="ha", name="Hausa", regions={ "NG" } },
+ { code="hak", name="Hakka Chinese", regions={ "TW" } },
+ { code="he", name="Hebrew", regions={ "IL" } },
+ { code="hi", name="Hindi", regions={ "IN" } },
+ { code="hif", name="Fiji Hindi", regions={ "FJ" } },
+ { code="hne", name="Chhattisgarhi", regions={ "IN" } },
+ { code="hr", name="Croatian", regions={ "HR" } },
+ { code="hsb", name="Upper Sorbian", regions={ "DE" } },
+ { code="ht", name="Haitian Creole", regions={ "HT" } },
+ { code="hu", name="Hungarian", regions={ "HU" } },
+ { code="hy", name="Armenian", regions={ "AM" } },
+ { code="ia", name="Interlingua", regions={ "FR" } },
+ { code="id", name="Indonesian", regions={ "ID" } },
+ { code="ig", name="Igbo", regions={ "NG" } },
+ { code="ik", name="Inupiaq", regions={ "CA" } },
+ { code="is", name="Icelandic", regions={ "IS" } },
+ { code="it", name="Italian", regions={ "CH",
"IT" } },
+ { code="iu", name="Inuktitut", regions={ "CA" } },
+ { code="ja", name="Japanese", regions={ "JP" } },
+ { code="ka", name="Georgian", regions={ "GE" } },
+ { code="kab", name="Kabyle", regions={ "DZ" } },
+ { code="kk", name="Kazakh", regions={ "KZ" } },
+ { code="kl", name="Kalaallisut", regions={ "GL" } },
+ { code="km", name="Khmer", regions={ "KH" } },
+ { code="kn", name="Kannada", regions={ "IN" } },
+ { code="ko", name="Korean", regions={ "KR" } },
+ { code="kok", name="Konkani", regions={ "IN" } },
+ { code="ks", name="Kashmiri", regions={ "IN" } },
+ { code="ku", name="Kurdish", regions={ "TR" } },
+ { code="kw", name="Cornish", regions={ "GB" } },
+ { code="ky", name="Kyrgyz", regions={ "KG" } },
+ { code="lb", name="Luxembourgish", regions={ "LU" } },
+ { code="lg", name="Ganda", regions={ "UG" } },
+ { code="li", name="Limburgish", regions={ "BE",
"NL" } },
+ { code="lij", name="Ligurian", regions={ "IT" } },
+ { code="ln", name="Lingala", regions={ "CD" } },
+ { code="lo", name="Lao", regions={ "LA" } },
+ { code="lt", name="Lithuanian", regions={ "LT" } },
+ { code="lv", name="Latvian", regions={ "LV" } },
+ { code="lzh", name="Literary Chinese", regions={ "TW" }
},
+ { code="mag", name="Magahi", regions={ "IN" } },
+ { code="mai", name="Maithili", regions={ "IN",
"NP" } },
+ { code="mfe", name="Morisyen", regions={ "MU" } },
+ { code="mg", name="Malagasy", regions={ "MG" } },
+ { code="mhr", name="Meadow Mari", regions={ "RU" } },
+ { code="mi", name="Maori", regions={ "NZ" } },
+ { code="miq", name="Miskito", regions={ "NI" } },
+ { code="mjw", name="Karbi", regions={ "IN" } },
+ { code="mk", name="Macedonian", regions={ "MK" } },
+ { code="ml", name="Malayalam", regions={ "IN" } },
+ { code="mn", name="Mongolian", regions={ "MN" } },
+ { code="mni", name="Manipuri", regions={ "IN" } },
+ { code="mnw", name="Mon", regions={ "MM" } },
+ { code="mr", name="Marathi", regions={ "IN" } },
+ { code="ms", name="Malay", regions={ "MY" } },
+ { code="mt", name="Maltese", regions={ "MT" } },
+ { code="my", name="Burmese", regions={ "MM" } },
+ { code="nan", name="Min Nan Chinese", regions={ "TW" }
},
+ { code="nb", name="Norwegian Bokmål", regions={ "NO" }
},
+ { code="nds", name="Low German", regions={ "DE",
"NL" } },
+ { code="ne", name="Nepali", regions={ "NP" } },
+ { code="nhn", name="Tlaxcala-Puebla Nahuatl", regions={
"MX" } },
+ { code="niu", name="Niuean", regions={ "NU",
"NZ" } },
+ { code="nl", name="Dutch", regions={ "AW",
"BE", "NL" } },
+ { code="nn", name="Norwegian Nynorsk", regions={ "NO" }
},
+ { code="nr", name="South Ndebele", regions={ "ZA" } },
+ { code="nso", name="Northern Sotho", regions={ "ZA" }
},
+ { code="oc", name="Occitan", regions={ "FR" } },
+ { code="om", name="Oromo", regions={ "ET",
"KE" } },
+ { code="or", name="Odia", regions={ "IN" } },
+ { code="os", name="Ossetic", regions={ "RU" } },
+ { code="pa", name="Punjabi", regions={ "IN",
"PK" } },
+ { code="pap", name="Papiamento", regions={ "AW",
"CW" } },
+ { code="pl", name="Polish", regions={ "PL" } },
+ { code="ps", name="Pashto", regions={ "AF" } },
+ { code="pt", name="Portuguese", regions={ "BR",
"PT" } },
+ { code="quz", name="Cusco Quechua", regions={ "PE" } },
+ { code="raj", name="Rajasthani", regions={ "IN" } },
+ { code="ro", name="Romanian", regions={ "RO" } },
+ { code="ru", name="Russian", regions={ "RU",
"UA" } },
+ { code="rw", name="Kinyarwanda", regions={ "RW" } },
+ { code="sa", name="Sanskrit", regions={ "IN" } },
+ { code="sah", name="Sakha", regions={ "RU" } },
+ { code="sat", name="Santali", regions={ "IN" } },
+ { code="sc", name="Sardinian", regions={ "IT" } },
+ { code="sd", name="Sindhi", regions={ "IN" } },
+ { code="se", name="Northern Sami", regions={ "NO" } },
+ { code="sgs", name="Samogitian", regions={ "LT" } },
+ { code="shn", name="Shan", regions={ "MM" } },
+ { code="shs", name="Shuswap", regions={ "CA" } },
+ { code="si", name="Sinhala", regions={ "LK" } },
+ { code="sid", name="Sidamo", regions={ "ET" } },
+ { code="sk", name="Slovak", regions={ "SK" } },
+ { code="sl", name="Slovenian", regions={ "SI" } },
+ { code="sm", name="Samoan", regions={ "WS" } },
+ { code="so", name="Somali", regions={ "DJ",
"ET", "KE", "SO" } },
+ { code="sq", name="Albanian", regions={ "AL",
"MK" } },
+ { code="sr", name="Serbian", regions={ "ME",
"RS" } },
+ { code="ss", name="Swati", regions={ "ZA" } },
+ { code="st", name="Southern Sotho", regions={ "ZA" } },
+ { code="sv", name="Swedish", regions={ "FI",
"SE" } },
+ { code="sw", name="Swahili", regions={ "KE",
"TZ" } },
+ { code="szl", name="Silesian", regions={ "PL" } },
+ { code="ta", name="Tamil", regions={ "IN",
"LK" } },
+ { code="tcy", name="Tulu", regions={ "IN" } },
+ { code="te", name="Telugu", regions={ "IN" } },
+ { code="tg", name="Tajik", regions={ "TJ" } },
+ { code="th", name="Thai", regions={ "TH" } },
+ { code="the", name="Chitwania Tharu", regions={ "NP" }
},
+ { code="ti", name="Tigrinya", regions={ "ER",
"ET" } },
+ { code="tig", name="Tigre", regions={ "ER" } },
+ { code="tk", name="Turkmen", regions={ "TM" } },
+ { code="tl", name="Tagalog", regions={ "PH" } },
+ { code="tn", name="Tswana", regions={ "ZA" } },
+ { code="to", name="Tongan", regions={ "TO" } },
+ { code="tpi", name="Tok Pisin", regions={ "PG" } },
+ { code="tr", name="Turkish", regions={ "CY",
"TR" } },
+ { code="ts", name="Tsonga", regions={ "ZA" } },
+ { code="tt", name="Tatar", regions={ "RU" } },
+ { code="ug", name="Uyghur", regions={ "CN" } },
+ { code="uk", name="Ukrainian", regions={ "UA" } },
+ { code="unm", name="Unami language", regions={ "US" }
},
+ { code="ur", name="Urdu", regions={ "IN", "PK"
} },
+ { code="uz", name="Uzbek", regions={ "UZ" } },
+ { code="ve", name="Venda", regions={ "ZA" } },
+ { code="vi", name="Vietnamese", regions={ "VN" } },
+ { code="wa", name="Walloon", regions={ "BE" } },
+ { code="wae", name="Walser", regions={ "CH" } },
+ { code="wal", name="Wolaytta", regions={ "ET" } },
+ { code="wo", name="Wolof", regions={ "SN" } },
+ { code="xh", name="Xhosa", regions={ "ZA" } },
+ { code="yi", name="Yiddish", regions={ "US" } },
+ { code="yo", name="Yoruba", regions={ "NG" } },
+ { code="yue", name="Cantonese", regions={ "HK" } },
+ { code="yuw", name="Yau", regions={ "PG" } },
+ { code="zh", name="Mandarin Chinese", regions={ "CN",
"HK", "SG", "TW" } },
+ { code="zu", name="Zulu", regions={ "ZA" } }
+}
OK.
+
+-- Prints a list of LANGUAGE "_" REGION pairs. The output is expected
+-- to be identical to parse-SUPPORTED.py. Called from the %%prep section.
+function print_locale_pairs()
+ for i = 1, #locales do
+ local locale = locales[i]
+ if #locale.regions == 0 then
+ print(locale.code .. "\n")
+ else
+ for j = 1, #locale.regions do
+ print(locale.code .. "_" .. locale.regions[j] .. "\n")
OK.
end
end
end
- -- Sort for determinism.
- table.sort(languages)
- for _, supples in pairs(supplements) do
- table.sort(supplements)
- end
OK. We stop sorting the languages alphabetically, but instead now rely
on the table of data being sorted to *match* the output of parse-SUPPORTED.py
which includes sorting glibc's SUPPORTED. Therefore if the table gets out
of order we'll see a natural failure in %prep.
end
--- Compute the language names
-local langnames = {}
-local python3 = io.open('/usr/bin/python3', 'r')
-if python3 then
- python3:close()
- local args = table.concat(languages, ' ')
- local file = io.popen(rpm.expand("%{SOURCE13}") .. ' ' .. args)
- while true do
- line = file:read()
- if line == nil then break end
- langnames[#langnames + 1] = line
- end
- file:close()
-else
- for i = 1, #languages do
- langnames[#langnames + 1] = languages[i]
- end
-end
-
OK.
--- Compute the Supplements: list for a language, based on the
regions.
-local function compute_supplements(lang)
+local function compute_supplements(locale)
+ local lang = locale.code
+ local regions = locale.regions
result = "langpacks-core-" .. lang
- regions = supplements[lang]
- if regions ~= nil then
- for i = 1, #regions do
- result = result .. " or langpacks-core-" .. lang .. "_" ..
regions[i]
- end
+ for i = 1, #regions do
+ result = result .. " or langpacks-core-" .. lang .. "_" ..
regions[i]
OK.
end
return result
end
-- Emit the definition of a language pack package.
-local function lang_package(lang, langname)
- local suppl = compute_supplements(lang)
+local function lang_package(locale)
+ local lang = locale.code
+ local langname = locale.name
+ local suppl = compute_supplements(locale)
OK.
print(rpm.expand([[
%package langpack-]]..lang..[[
@@ -528,8 +763,8 @@ to support the ]]..langname..[[ language in your applications.
]]))
end
-for i = 1, #languages do
- lang_package(languages[i], langnames[i])
+for i = 1, #locales do
+ lang_package(locales[i])
OK.
end
}
@@ -748,17 +983,16 @@ touch `find . -name configure`
# Ensure *-kw.h files are current to prevent regenerating them.
touch locale/programs/*-kw.h
-# Verify that our copy of localedata/SUPPORTED matches the glibc
-# version.
-#
-# The separate file copy is used by the Lua parser above.
-# Patches or new upstream versions may change the list of locales,
-# which changes the set of langpacks we need to build. Verify the
-# differences then update the copy of SUPPORTED. This approach has
-# two purposes: (a) avoid spurious changes to the set of langpacks,
-# and (b) the Lua snippet can use a fully patched-up version
-# of the localedata/SUPPORTED file.
-diff -u %{SOURCE11} localedata/SUPPORTED
OK.
+# Verify that our locales table is compatible with the locales
table
+# in the spec file.
+set +x
+echo '%{lua: print_locale_pairs()}' > localedata/SUPPORTED.spec
+set -x
+python3 %{SOURCE11} localedata/SUPPORTED > localedata/SUPPORTED.glibc
+diff -u \
+ --label "spec file" localedata/SUPPORTED.spec \
+ --label "glibc localedata/SUPPORTED" localedata/SUPPORTED.glibc
OK. If we don't sort the data table in the spec file alphabetically then
it would fail during diff because it wouldn't match glibc.
+rm localedata/SUPPORTED.spec localedata/SUPPORTED.glibc
OK. Nice setup.
##############################################################################
# Build glibc...
diff --git a/parse-SUPPORTED.py b/parse-SUPPORTED.py
new file mode 100644
index 0000000..cf512de
--- /dev/null
+++ b/parse-SUPPORTED.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+#
+# This script turns localedata/SUPPORTED (whose path is passed as the
+# first argument) into a normalized list of LANGUAGE "_" REGION pairs.
+# (If there is no REGION defined, only LANGUAGE is used.) The list
+# is written to standard output, with one element per line.
+
+import sys
+
+supported, = sys.argv[1:]
+
+# Pairs seen so far. Used to suppress duplicates.
+seen = set()
+with open(supported) as inp:
+ for line in inp:
OK. Walk lines inorder.
+ if line.startswith("#") or line ==
"SUPPORTED-LOCALES=\\\n":
+ # Comment or prefix.
+ continue
OK. Ignore comments and SUPPORTED-LOCALES start assignment.
+ if not line.endswith(" \\\n"):
+ raise IOError("line without continuation: " + repr(line))
+ try:
+ slash = line.index("/")
+ except ValueError:
+ raise IOError("line without slash: " + repr(line))
+ spec = line[:slash]
+ for separator in ".@":
+ try:
+ # Strip charset, variant specifiers.
+ spec = spec[:spec.index(separator)]
+ except ValueError:
+ pass
+ seen.add(spec)
+
+# The C locale does not correspond to a language.
+seen.remove("C")
+
+# The glibc source file is not sorted.
+for spec in sorted(seen):
+ print(spec)
OK. Sort the SUPPORTED data.
+print() # The Lua generator produces a trailing newline.
--
Cheers,
Carlos.