[PATCH] Make glibc.spec self-contained for parsing

Monday, 12 October 2020

Store the locales list as a Lua table in the spec file.  Add Lua
code and a new Python script, parse-SUPPORTED.py, to compute
a common representation from it.

This is in response to this bug:

  <https://bugzilla.redhat.com/show_bug.cgi?id=1887097>

---
 SUPPORTED          | 496 -----------------------------------------------------
 convnames.py       |  18 --
 glibc.spec         | 418 ++++++++++++++++++++++++++++++++++----------
 parse-SUPPORTED.py |  40 +++++
 4 files changed, 366 insertions(+), 606 deletions(-)

diff --git a/SUPPORTED b/SUPPORTED
deleted file mode 100644
index cd785d7..0000000
--- a/SUPPORTED
+++ /dev/null
@@ -1,496 +0,0 @@
-# This file names the currently supported and somewhat tested locales.
-# If you have any additions please file a glibc bug report.
-SUPPORTED-LOCALES=\
-C.UTF-8/UTF-8 \
-aa_DJ.UTF-8/UTF-8 \
-aa_DJ/ISO-8859-1 \
-aa_ER/UTF-8 \
-aa_ER@saaho/UTF-8 \
-aa_ET/UTF-8 \
-af_ZA.UTF-8/UTF-8 \
-af_ZA/ISO-8859-1 \
-agr_PE/UTF-8 \
-ak_GH/UTF-8 \
-am_ET/UTF-8 \
-an_ES.UTF-8/UTF-8 \
-an_ES/ISO-8859-15 \
-anp_IN/UTF-8 \
-ar_AE.UTF-8/UTF-8 \
-ar_AE/ISO-8859-6 \
-ar_BH.UTF-8/UTF-8 \
-ar_BH/ISO-8859-6 \
-ar_DZ.UTF-8/UTF-8 \
-ar_DZ/ISO-8859-6 \
-ar_EG.UTF-8/UTF-8 \
-ar_EG/ISO-8859-6 \
-ar_IN/UTF-8 \
-ar_IQ.UTF-8/UTF-8 \
-ar_IQ/ISO-8859-6 \
-ar_JO.UTF-8/UTF-8 \
-ar_JO/ISO-8859-6 \
-ar_KW.UTF-8/UTF-8 \
-ar_KW/ISO-8859-6 \
-ar_LB.UTF-8/UTF-8 \
-ar_LB/ISO-8859-6 \
-ar_LY.UTF-8/UTF-8 \
-ar_LY/ISO-8859-6 \
-ar_MA.UTF-8/UTF-8 \
-ar_MA/ISO-8859-6 \
-ar_OM.UTF-8/UTF-8 \
-ar_OM/ISO-8859-6 \
-ar_QA.UTF-8/UTF-8 \
-ar_QA/ISO-8859-6 \
-ar_SA.UTF-8/UTF-8 \
-ar_SA/ISO-8859-6 \
-ar_SD.UTF-8/UTF-8 \
-ar_SD/ISO-8859-6 \
-ar_SS/UTF-8 \
-ar_SY.UTF-8/UTF-8 \
-ar_SY/ISO-8859-6 \
-ar_TN.UTF-8/UTF-8 \
-ar_TN/ISO-8859-6 \
-ar_YE.UTF-8/UTF-8 \
-ar_YE/ISO-8859-6 \
-ayc_PE/UTF-8 \
-az_AZ/UTF-8 \
-az_IR/UTF-8 \
-as_IN/UTF-8 \
-ast_ES.UTF-8/UTF-8 \
-ast_ES/ISO-8859-15 \
-be_BY.UTF-8/UTF-8 \
-be_BY/CP1251 \
-be_BY@latin/UTF-8 \
-bem_ZM/UTF-8 \
-ber_DZ/UTF-8 \
-ber_MA/UTF-8 \
-bg_BG.UTF-8/UTF-8 \
-bg_BG/CP1251 \
-bhb_IN.UTF-8/UTF-8 \
-bho_IN/UTF-8 \
-bho_NP/UTF-8 \
-bi_VU/UTF-8 \
-bn_BD/UTF-8 \
-bn_IN/UTF-8 \
-bo_CN/UTF-8 \
-bo_IN/UTF-8 \
-br_FR.UTF-8/UTF-8 \
-br_FR/ISO-8859-1 \
-br_FR@euro/ISO-8859-15 \
-brx_IN/UTF-8 \
-bs_BA.UTF-8/UTF-8 \
-bs_BA/ISO-8859-2 \
-byn_ER/UTF-8 \
-ca_AD.UTF-8/UTF-8 \
-ca_AD/ISO-8859-15 \
-ca_ES.UTF-8/UTF-8 \
-ca_ES/ISO-8859-1 \
-ca_ES@euro/ISO-8859-15 \
-ca_ES@valencia/UTF-8 \
-ca_FR.UTF-8/UTF-8 \
-ca_FR/ISO-8859-15 \
-ca_IT.UTF-8/UTF-8 \
-ca_IT/ISO-8859-15 \
-ce_RU/UTF-8 \
-chr_US/UTF-8 \
-ckb_IQ/UTF-8 \
-cmn_TW/UTF-8 \
-crh_UA/UTF-8 \
-cs_CZ.UTF-8/UTF-8 \
-cs_CZ/ISO-8859-2 \
-csb_PL/UTF-8 \
-cv_RU/UTF-8 \
-cy_GB.UTF-8/UTF-8 \
-cy_GB/ISO-8859-14 \
-da_DK.UTF-8/UTF-8 \
-da_DK/ISO-8859-1 \
-da_DK.ISO-8859-15/ISO-8859-15 \
-de_AT.UTF-8/UTF-8 \
-de_AT/ISO-8859-1 \
-de_AT@euro/ISO-8859-15 \
-de_BE.UTF-8/UTF-8 \
-de_BE/ISO-8859-1 \
-de_BE@euro/ISO-8859-15 \
-de_CH.UTF-8/UTF-8 \
-de_CH/ISO-8859-1 \
-de_DE.UTF-8/UTF-8 \
-de_DE/ISO-8859-1 \
-de_DE@euro/ISO-8859-15 \
-de_IT.UTF-8/UTF-8 \
-de_IT/ISO-8859-1 \
-de_LI.UTF-8/UTF-8 \
-de_LU.UTF-8/UTF-8 \
-de_LU/ISO-8859-1 \
-de_LU@euro/ISO-8859-15 \
-doi_IN/UTF-8 \
-dsb_DE/UTF-8 \
-dv_MV/UTF-8 \
-dz_BT/UTF-8 \
-el_GR.UTF-8/UTF-8 \
-el_GR/ISO-8859-7 \
-el_GR@euro/ISO-8859-7 \
-el_CY.UTF-8/UTF-8 \
-el_CY/ISO-8859-7 \
-en_AG/UTF-8 \
-en_AU.UTF-8/UTF-8 \
-en_AU/ISO-8859-1 \
-en_BW.UTF-8/UTF-8 \
-en_BW/ISO-8859-1 \
-en_CA.UTF-8/UTF-8 \
-en_CA/ISO-8859-1 \
-en_DK.UTF-8/UTF-8 \
-en_DK/ISO-8859-1 \
-en_GB.UTF-8/UTF-8 \
-en_GB/ISO-8859-1 \
-en_GB.ISO-8859-15/ISO-8859-15 \
-en_HK.UTF-8/UTF-8 \
-en_HK/ISO-8859-1 \
-en_IE.UTF-8/UTF-8 \
-en_IE/ISO-8859-1 \
-en_IE@euro/ISO-8859-15 \
-en_IL/UTF-8 \
-en_IN/UTF-8 \
-en_NG/UTF-8 \
-en_NZ.UTF-8/UTF-8 \
-en_NZ/ISO-8859-1 \
-en_PH.UTF-8/UTF-8 \
-en_PH/ISO-8859-1 \
-en_SC.UTF-8/UTF-8 \
-en_SG.UTF-8/UTF-8 \
-en_SG/ISO-8859-1 \
-en_US.UTF-8/UTF-8 \
-en_US/ISO-8859-1 \
-en_US.ISO-8859-15/ISO-8859-15 \
-en_ZA.UTF-8/UTF-8 \
-en_ZA/ISO-8859-1 \
-en_ZM/UTF-8 \
-en_ZW.UTF-8/UTF-8 \
-en_ZW/ISO-8859-1 \
-eo/UTF-8 \
-es_AR.UTF-8/UTF-8 \
-es_AR/ISO-8859-1 \
-es_BO.UTF-8/UTF-8 \
-es_BO/ISO-8859-1 \
-es_CL.UTF-8/UTF-8 \
-es_CL/ISO-8859-1 \
-es_CO.UTF-8/UTF-8 \
-es_CO/ISO-8859-1 \
-es_CR.UTF-8/UTF-8 \
-es_CR/ISO-8859-1 \
-es_CU/UTF-8 \
-es_DO.UTF-8/UTF-8 \
-es_DO/ISO-8859-1 \
-es_EC.UTF-8/UTF-8 \
-es_EC/ISO-8859-1 \
-es_ES.UTF-8/UTF-8 \
-es_ES/ISO-8859-1 \
-es_ES@euro/ISO-8859-15 \
-es_GT.UTF-8/UTF-8 \
-es_GT/ISO-8859-1 \
-es_HN.UTF-8/UTF-8 \
-es_HN/ISO-8859-1 \
-es_MX.UTF-8/UTF-8 \
-es_MX/ISO-8859-1 \
-es_NI.UTF-8/UTF-8 \
-es_NI/ISO-8859-1 \
-es_PA.UTF-8/UTF-8 \
-es_PA/ISO-8859-1 \
-es_PE.UTF-8/UTF-8 \
-es_PE/ISO-8859-1 \
-es_PR.UTF-8/UTF-8 \
-es_PR/ISO-8859-1 \
-es_PY.UTF-8/UTF-8 \
-es_PY/ISO-8859-1 \
-es_SV.UTF-8/UTF-8 \
-es_SV/ISO-8859-1 \
-es_US.UTF-8/UTF-8 \
-es_US/ISO-8859-1 \
-es_UY.UTF-8/UTF-8 \
-es_UY/ISO-8859-1 \
-es_VE.UTF-8/UTF-8 \
-es_VE/ISO-8859-1 \
-et_EE.UTF-8/UTF-8 \
-et_EE/ISO-8859-1 \
-et_EE.ISO-8859-15/ISO-8859-15 \
-eu_ES.UTF-8/UTF-8 \
-eu_ES/ISO-8859-1 \
-eu_ES@euro/ISO-8859-15 \
-fa_IR/UTF-8 \
-ff_SN/UTF-8 \
-fi_FI.UTF-8/UTF-8 \
-fi_FI/ISO-8859-1 \
-fi_FI@euro/ISO-8859-15 \
-fil_PH/UTF-8 \
-fo_FO.UTF-8/UTF-8 \
-fo_FO/ISO-8859-1 \
-fr_BE.UTF-8/UTF-8 \
-fr_BE/ISO-8859-1 \
-fr_BE@euro/ISO-8859-15 \
-fr_CA.UTF-8/UTF-8 \
-fr_CA/ISO-8859-1 \
-fr_CH.UTF-8/UTF-8 \
-fr_CH/ISO-8859-1 \
-fr_FR.UTF-8/UTF-8 \
-fr_FR/ISO-8859-1 \
-fr_FR@euro/ISO-8859-15 \
-fr_LU.UTF-8/UTF-8 \
-fr_LU/ISO-8859-1 \
-fr_LU@euro/ISO-8859-15 \
-fur_IT/UTF-8 \
-fy_NL/UTF-8 \
-fy_DE/UTF-8 \
-ga_IE.UTF-8/UTF-8 \
-ga_IE/ISO-8859-1 \
-ga_IE@euro/ISO-8859-15 \
-gd_GB.UTF-8/UTF-8 \
-gd_GB/ISO-8859-15 \
-gez_ER/UTF-8 \
-gez_ER@abegede/UTF-8 \
-gez_ET/UTF-8 \
-gez_ET@abegede/UTF-8 \
-gl_ES.UTF-8/UTF-8 \
-gl_ES/ISO-8859-1 \
-gl_ES@euro/ISO-8859-15 \
-gu_IN/UTF-8 \
-gv_GB.UTF-8/UTF-8 \
-gv_GB/ISO-8859-1 \
-ha_NG/UTF-8 \
-hak_TW/UTF-8 \
-he_IL.UTF-8/UTF-8 \
-he_IL/ISO-8859-8 \
-hi_IN/UTF-8 \
-hif_FJ/UTF-8 \
-hne_IN/UTF-8 \
-hr_HR.UTF-8/UTF-8 \
-hr_HR/ISO-8859-2 \
-hsb_DE/ISO-8859-2 \
-hsb_DE.UTF-8/UTF-8 \
-ht_HT/UTF-8 \
-hu_HU.UTF-8/UTF-8 \
-hu_HU/ISO-8859-2 \
-hy_AM/UTF-8 \
-hy_AM.ARMSCII-8/ARMSCII-8 \
-ia_FR/UTF-8 \
-id_ID.UTF-8/UTF-8 \
-id_ID/ISO-8859-1 \
-ig_NG/UTF-8 \
-ik_CA/UTF-8 \
-is_IS.UTF-8/UTF-8 \
-is_IS/ISO-8859-1 \
-it_CH.UTF-8/UTF-8 \
-it_CH/ISO-8859-1 \
-it_IT.UTF-8/UTF-8 \
-it_IT/ISO-8859-1 \
-it_IT@euro/ISO-8859-15 \
-iu_CA/UTF-8 \
-ja_JP.EUC-JP/EUC-JP \
-ja_JP.UTF-8/UTF-8 \
-ka_GE.UTF-8/UTF-8 \
-ka_GE/GEORGIAN-PS \
-kab_DZ/UTF-8 \
-kk_KZ.UTF-8/UTF-8 \
-kk_KZ/PT154 \
-kl_GL.UTF-8/UTF-8 \
-kl_GL/ISO-8859-1 \
-km_KH/UTF-8 \
-kn_IN/UTF-8 \
-ko_KR.EUC-KR/EUC-KR \
-ko_KR.UTF-8/UTF-8 \
-kok_IN/UTF-8 \
-ks_IN/UTF-8 \
-ks_IN@devanagari/UTF-8 \
-ku_TR.UTF-8/UTF-8 \
-ku_TR/ISO-8859-9 \
-kw_GB.UTF-8/UTF-8 \
-kw_GB/ISO-8859-1 \
-ky_KG/UTF-8 \
-lb_LU/UTF-8 \
-lg_UG.UTF-8/UTF-8 \
-lg_UG/ISO-8859-10 \
-li_BE/UTF-8 \
-li_NL/UTF-8 \
-lij_IT/UTF-8 \
-ln_CD/UTF-8 \
-lo_LA/UTF-8 \
-lt_LT.UTF-8/UTF-8 \
-lt_LT/ISO-8859-13 \
-lv_LV.UTF-8/UTF-8 \
-lv_LV/ISO-8859-13 \
-lzh_TW/UTF-8 \
-mag_IN/UTF-8 \
-mai_IN/UTF-8 \
-mai_NP/UTF-8 \
-mfe_MU/UTF-8 \
-mg_MG.UTF-8/UTF-8 \
-mg_MG/ISO-8859-15 \
-mhr_RU/UTF-8 \
-mi_NZ.UTF-8/UTF-8 \
-mi_NZ/ISO-8859-13 \
-miq_NI/UTF-8 \
-mjw_IN/UTF-8 \
-mk_MK.UTF-8/UTF-8 \
-mk_MK/ISO-8859-5 \
-ml_IN/UTF-8 \
-mn_MN/UTF-8 \
-mni_IN/UTF-8 \
-mnw_MM/UTF-8 \
-mr_IN/UTF-8 \
-ms_MY.UTF-8/UTF-8 \
-ms_MY/ISO-8859-1 \
-mt_MT.UTF-8/UTF-8 \
-mt_MT/ISO-8859-3 \
-my_MM/UTF-8 \
-nan_TW/UTF-8 \
-nan_TW@latin/UTF-8 \
-nb_NO.UTF-8/UTF-8 \
-nb_NO/ISO-8859-1 \
-nds_DE/UTF-8 \
-nds_NL/UTF-8 \
-ne_NP/UTF-8 \
-nhn_MX/UTF-8 \
-niu_NU/UTF-8 \
-niu_NZ/UTF-8 \
-nl_AW/UTF-8 \
-nl_BE.UTF-8/UTF-8 \
-nl_BE/ISO-8859-1 \
-nl_BE@euro/ISO-8859-15 \
-nl_NL.UTF-8/UTF-8 \
-nl_NL/ISO-8859-1 \
-nl_NL@euro/ISO-8859-15 \
-nn_NO.UTF-8/UTF-8 \
-nn_NO/ISO-8859-1 \
-nr_ZA/UTF-8 \
-nso_ZA/UTF-8 \
-oc_FR.UTF-8/UTF-8 \
-oc_FR/ISO-8859-1 \
-om_ET/UTF-8 \
-om_KE.UTF-8/UTF-8 \
-om_KE/ISO-8859-1 \
-or_IN/UTF-8 \
-os_RU/UTF-8 \
-pa_IN/UTF-8 \
-pa_PK/UTF-8 \
-pap_AW/UTF-8 \
-pap_CW/UTF-8 \
-pl_PL.UTF-8/UTF-8 \
-pl_PL/ISO-8859-2 \
-ps_AF/UTF-8 \
-pt_BR.UTF-8/UTF-8 \
-pt_BR/ISO-8859-1 \
-pt_PT.UTF-8/UTF-8 \
-pt_PT/ISO-8859-1 \
-pt_PT@euro/ISO-8859-15 \
-quz_PE/UTF-8 \
-raj_IN/UTF-8 \
-ro_RO.UTF-8/UTF-8 \
-ro_RO/ISO-8859-2 \
-ru_RU.KOI8-R/KOI8-R \
-ru_RU.UTF-8/UTF-8 \
-ru_RU/ISO-8859-5 \
-ru_UA.UTF-8/UTF-8 \
-ru_UA/KOI8-U \
-rw_RW/UTF-8 \
-sa_IN/UTF-8 \
-sah_RU/UTF-8 \
-sat_IN/UTF-8 \
-sc_IT/UTF-8 \
-sd_IN/UTF-8 \
-sd_IN@devanagari/UTF-8 \
-se_NO/UTF-8 \
-sgs_LT/UTF-8 \
-shn_MM/UTF-8 \
-shs_CA/UTF-8 \
-si_LK/UTF-8 \
-sid_ET/UTF-8 \
-sk_SK.UTF-8/UTF-8 \
-sk_SK/ISO-8859-2 \
-sl_SI.UTF-8/UTF-8 \
-sl_SI/ISO-8859-2 \
-sm_WS/UTF-8 \
-so_DJ.UTF-8/UTF-8 \
-so_DJ/ISO-8859-1 \
-so_ET/UTF-8 \
-so_KE.UTF-8/UTF-8 \
-so_KE/ISO-8859-1 \
-so_SO.UTF-8/UTF-8 \
-so_SO/ISO-8859-1 \
-sq_AL.UTF-8/UTF-8 \
-sq_AL/ISO-8859-1 \
-sq_MK/UTF-8 \
-sr_ME/UTF-8 \
-sr_RS/UTF-8 \
-sr_RS@latin/UTF-8 \
-ss_ZA/UTF-8 \
-st_ZA.UTF-8/UTF-8 \
-st_ZA/ISO-8859-1 \
-sv_FI.UTF-8/UTF-8 \
-sv_FI/ISO-8859-1 \
-sv_FI@euro/ISO-8859-15 \
-sv_SE.UTF-8/UTF-8 \
-sv_SE/ISO-8859-1 \
-sv_SE.ISO-8859-15/ISO-8859-15 \
-sw_KE/UTF-8 \
-sw_TZ/UTF-8 \
-szl_PL/UTF-8 \
-ta_IN/UTF-8 \
-ta_LK/UTF-8 \
-tcy_IN.UTF-8/UTF-8 \
-te_IN/UTF-8 \
-tg_TJ.UTF-8/UTF-8 \
-tg_TJ/KOI8-T \
-th_TH.UTF-8/UTF-8 \
-th_TH/TIS-620 \
-the_NP/UTF-8 \
-ti_ER/UTF-8 \
-ti_ET/UTF-8 \
-tig_ER/UTF-8 \
-tk_TM/UTF-8 \
-tl_PH.UTF-8/UTF-8 \
-tl_PH/ISO-8859-1 \
-tn_ZA/UTF-8 \
-to_TO/UTF-8 \
-tpi_PG/UTF-8 \
-tr_CY.UTF-8/UTF-8 \
-tr_CY/ISO-8859-9 \
-tr_TR.UTF-8/UTF-8 \
-tr_TR/ISO-8859-9 \
-ts_ZA/UTF-8 \
-tt_RU/UTF-8 \
-tt_RU@iqtelif/UTF-8 \
-ug_CN/UTF-8 \
-uk_UA.UTF-8/UTF-8 \
-uk_UA/KOI8-U \
-unm_US/UTF-8 \
-ur_IN/UTF-8 \
-ur_PK/UTF-8 \
-uz_UZ.UTF-8/UTF-8 \
-uz_UZ/ISO-8859-1 \
-uz_UZ@cyrillic/UTF-8 \
-ve_ZA/UTF-8 \
-vi_VN/UTF-8 \
-wa_BE/ISO-8859-1 \
-wa_BE@euro/ISO-8859-15 \
-wa_BE.UTF-8/UTF-8 \
-wae_CH/UTF-8 \
-wal_ET/UTF-8 \
-wo_SN/UTF-8 \
-xh_ZA.UTF-8/UTF-8 \
-xh_ZA/ISO-8859-1 \
-yi_US.UTF-8/UTF-8 \
-yi_US/CP1255 \
-yo_NG/UTF-8 \
-yue_HK/UTF-8 \
-yuw_PG/UTF-8 \
-zh_CN.GB18030/GB18030 \
-zh_CN.GBK/GBK \
-zh_CN.UTF-8/UTF-8 \
-zh_CN/GB2312 \
-zh_HK.UTF-8/UTF-8 \
-zh_HK/BIG5-HKSCS \
-zh_SG.UTF-8/UTF-8 \
-zh_SG.GBK/GBK \
-zh_SG/GB2312 \
-zh_TW.EUC-TW/EUC-TW \
-zh_TW.UTF-8/UTF-8 \
-zh_TW/BIG5 \
-zu_ZA.UTF-8/UTF-8 \
-zu_ZA/ISO-8859-1 \
diff --git a/convnames.py b/convnames.py
deleted file mode 100755
index 4a93041..0000000
--- a/convnames.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/python3
-# This code is called by glibc.spec via lua to generate the mapping
-# from language code to language name.  The code uses langtable to
-# do the mapping.  The information in langtable is a harmonization
-# of CLDR and glibc lang_name data.
-import sys
-try:
-    import langtable
-except ImportError:
-    # if the import fails, don't translate anything
-    langtable = None
-
-for lang in sys.argv[1:]:
-    if langtable:
-        name = langtable.language_name(languageId=lang, languageIdQuery='en')
-        print(name or lang)
-    else:
-        print(lang)
diff --git a/glibc.spec b/glibc.spec
index 67a85e2..c622b88 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -130,20 +130,9 @@ Source0: %{?glibc_release_url}%{glibcsrcdir}.tar.xz
 Source1: nscd.conf
 Source2: bench.mk
 Source3: glibc-bench-compare
-# A copy of localedata/SUPPORTED in the Source0 tarball.  The
-# SUPPORTED file is used below to generate the list of locale
-# packages, using a Lua snippet.
-# When the upstream SUPPORTED is out of sync with our copy, the
-# prep phase will fail and you will need to update the local
-# copy.
-Source11: SUPPORTED
+Source11: parse-SUPPORTED.py
 # Include in the source RPM for reference.
 Source12: ChangeLog.old
-# Provide ISO language code to name translation using Python's
-# langtable. The langtable data is maintained by the Fedora
-# i18n team and is a harmonization of CLDR and glibc lang_name
-# data in a more accessible API (also used by Anaconda).
-Source13: convnames.py
 
 ##############################################################################
 # Patches:
@@ -238,7 +227,6 @@ BuildRequires: systemd
 # distributions, python3 does not actually install /usr/bin/python3,
 # so we also depend on python3-devel.
 BuildRequires: python3 python3-devel
-BuildRequires: python3dist(langtable)
 
 # This GCC version is needed for -fstack-clash-protection support.
 BuildRequires: gcc >= 7.2.1-6
@@ -432,84 +420,331 @@ If you are building custom locales you will most likely use
 these sources as the basis for your new locale.
 
 %{lua:
--- Array of languages (ISO-639 codes).
-local languages = {}
--- Dictionary from language codes (as in the languages array) to arrays
--- of regions.
-local supplements = {}
-do
-   -- Parse the SUPPORTED file.  Eliminate duplicates.
-   local lang_region_seen = {}
-   for line in io.lines(rpm.expand("%{SOURCE11}")) do
-      -- Match lines which contain a language (eo) or language/region
-      -- (en_US) strings.
-      local lang_region = string.match(line, "^([a-z][^/@.]+)")
-      if lang_region ~= nil then
-	 if lang_region_seen[lang_region] == nil then
-	    lang_region_seen[lang_region] = true
+-- To make lua-mode hapyy: '
 
-	    -- Split language/region pair.
-	    local lang, region = string.match(lang_region, "^(.+)_(.+)")
-	    if lang == nil then
-	       -- Region is missing, use only the language.
-	       lang = lang_region
-	    end
-	    local suppl = supplements[lang]
-	    if suppl == nil then
-	       suppl = {}
-	       supplements[lang] = suppl
-	       -- New language not seen before.
-	       languages[#languages + 1] = lang
-	    end
-	    if region ~= nil then
-	       -- New region because of the check against
-	       -- lang_region_seen above.
-	       suppl[#suppl + 1] = region
-	    end
+-- List of supported locales.  This is used to generate the langpack
+-- subpackages below.  This table needs adjustments if the set of
+-- glibc locales changes.  "code" is the glibc code for the language
+-- (before the "_".  "name" is the English translation of the
language
+-- name (for use in subpackage descriptions).  "regions" is a table of
+-- variant specifiers (after the "_", excluding "@" and
"."
+-- variants/charset specifiers).  The table must be sorted by the code
+-- field, and the regions table must be sorted as well.
+--
+-- English translations of language names can be obtained using (for
+-- the "aa" language in this example):
+--
+-- python3 -c 'import langtable; print(langtable.language_name("aa",
languageIdQuery="en"))'
+
+local locales =  {
+  { code="aa", name="Afar", regions={ "DJ", "ER",
"ET" } },
+  { code="af", name="Afrikaans", regions={ "ZA" } },
+  { code="agr", name="Aguaruna", regions={ "PE" } },
+  { code="ak", name="Akan", regions={ "GH" } },
+  { code="am", name="Amharic", regions={ "ET" } },
+  { code="an", name="Aragonese", regions={ "ES" } },
+  { code="anp", name="Angika", regions={ "IN" } },
+  {
+    code="ar",
+    name="Arabic",
+    regions={
+      "AE",
+      "BH",
+      "DZ",
+      "EG",
+      "IN",
+      "IQ",
+      "JO",
+      "KW",
+      "LB",
+      "LY",
+      "MA",
+      "OM",
+      "QA",
+      "SA",
+      "SD",
+      "SS",
+      "SY",
+      "TN",
+      "YE" 
+    } 
+  },
+  { code="as", name="Assamese", regions={ "IN" } },
+  { code="ast", name="Asturian", regions={ "ES" } },
+  { code="ayc", name="Southern Aymara", regions={ "PE" }
},
+  { code="az", name="Azerbaijani", regions={ "AZ",
"IR" } },
+  { code="be", name="Belarusian", regions={ "BY" } },
+  { code="bem", name="Bemba", regions={ "ZM" } },
+  { code="ber", name="Berber", regions={ "DZ",
"MA" } },
+  { code="bg", name="Bulgarian", regions={ "BG" } },
+  { code="bhb", name="Bhili", regions={ "IN" } },
+  { code="bho", name="Bhojpuri", regions={ "IN",
"NP" } },
+  { code="bi", name="Bislama", regions={ "VU" } },
+  { code="bn", name="Bangla", regions={ "BD",
"IN" } },
+  { code="bo", name="Tibetan", regions={ "CN",
"IN" } },
+  { code="br", name="Breton", regions={ "FR" } },
+  { code="brx", name="Bodo", regions={ "IN" } },
+  { code="bs", name="Bosnian", regions={ "BA" } },
+  { code="byn", name="Blin", regions={ "ER" } },
+  { code="ca", name="Catalan", regions={ "AD",
"ES", "FR", "IT" } },
+  { code="ce", name="Chechen", regions={ "RU" } },
+  { code="chr", name="Cherokee", regions={ "US" } },
+  { code="ckb", name="Central Kurdish", regions={ "IQ" }
},
+  { code="cmn", name="Mandarin Chinese", regions={ "TW" }
},
+  { code="crh", name="Crimean Turkish", regions={ "UA" }
},
+  { code="cs", name="Czech", regions={ "CZ" } },
+  { code="csb", name="Kashubian", regions={ "PL" } },
+  { code="cv", name="Chuvash", regions={ "RU" } },
+  { code="cy", name="Welsh", regions={ "GB" } },
+  { code="da", name="Danish", regions={ "DK" } },
+  {
+    code="de",
+    name="German",
+    regions={ "AT", "BE", "CH", "DE",
"IT", "LI", "LU" } 
+  },
+  { code="doi", name="Dogri", regions={ "IN" } },
+  { code="dsb", name="Lower Sorbian", regions={ "DE" } },
+  { code="dv", name="Divehi", regions={ "MV" } },
+  { code="dz", name="Dzongkha", regions={ "BT" } },
+  { code="el", name="Greek", regions={ "CY", "GR"
} },
+  {
+    code="en",
+    name="English",
+    regions={
+      "AG",
+      "AU",
+      "BW",
+      "CA",
+      "DK",
+      "GB",
+      "HK",
+      "IE",
+      "IL",
+      "IN",
+      "NG",
+      "NZ",
+      "PH",
+      "SC",
+      "SG",
+      "US",
+      "ZA",
+      "ZM",
+      "ZW" 
+    } 
+  },
+  { code="eo", name="Esperanto", regions={} },
+  {
+    code="es",
+    name="Spanish",
+    regions={
+      "AR",
+      "BO",
+      "CL",
+      "CO",
+      "CR",
+      "CU",
+      "DO",
+      "EC",
+      "ES",
+      "GT",
+      "HN",
+      "MX",
+      "NI",
+      "PA",
+      "PE",
+      "PR",
+      "PY",
+      "SV",
+      "US",
+      "UY",
+      "VE" 
+    } 
+  },
+  { code="et", name="Estonian", regions={ "EE" } },
+  { code="eu", name="Basque", regions={ "ES" } },
+  { code="fa", name="Persian", regions={ "IR" } },
+  { code="ff", name="Fulah", regions={ "SN" } },
+  { code="fi", name="Finnish", regions={ "FI" } },
+  { code="fil", name="Filipino", regions={ "PH" } },
+  { code="fo", name="Faroese", regions={ "FO" } },
+  { code="fr", name="French", regions={ "BE",
"CA", "CH", "FR", "LU" } },
+  { code="fur", name="Friulian", regions={ "IT" } },
+  { code="fy", name="Western Frisian", regions={ "DE",
"NL" } },
+  { code="ga", name="Irish", regions={ "IE" } },
+  { code="gd", name="Scottish Gaelic", regions={ "GB" } },
+  { code="gez", name="Geez", regions={ "ER", "ET"
} },
+  { code="gl", name="Galician", regions={ "ES" } },
+  { code="gu", name="Gujarati", regions={ "IN" } },
+  { code="gv", name="Manx", regions={ "GB" } },
+  { code="ha", name="Hausa", regions={ "NG" } },
+  { code="hak", name="Hakka Chinese", regions={ "TW" } },
+  { code="he", name="Hebrew", regions={ "IL" } },
+  { code="hi", name="Hindi", regions={ "IN" } },
+  { code="hif", name="Fiji Hindi", regions={ "FJ" } },
+  { code="hne", name="Chhattisgarhi", regions={ "IN" } },
+  { code="hr", name="Croatian", regions={ "HR" } },
+  { code="hsb", name="Upper Sorbian", regions={ "DE" } },
+  { code="ht", name="Haitian Creole", regions={ "HT" } },
+  { code="hu", name="Hungarian", regions={ "HU" } },
+  { code="hy", name="Armenian", regions={ "AM" } },
+  { code="ia", name="Interlingua", regions={ "FR" } },
+  { code="id", name="Indonesian", regions={ "ID" } },
+  { code="ig", name="Igbo", regions={ "NG" } },
+  { code="ik", name="Inupiaq", regions={ "CA" } },
+  { code="is", name="Icelandic", regions={ "IS" } },
+  { code="it", name="Italian", regions={ "CH",
"IT" } },
+  { code="iu", name="Inuktitut", regions={ "CA" } },
+  { code="ja", name="Japanese", regions={ "JP" } },
+  { code="ka", name="Georgian", regions={ "GE" } },
+  { code="kab", name="Kabyle", regions={ "DZ" } },
+  { code="kk", name="Kazakh", regions={ "KZ" } },
+  { code="kl", name="Kalaallisut", regions={ "GL" } },
+  { code="km", name="Khmer", regions={ "KH" } },
+  { code="kn", name="Kannada", regions={ "IN" } },
+  { code="ko", name="Korean", regions={ "KR" } },
+  { code="kok", name="Konkani", regions={ "IN" } },
+  { code="ks", name="Kashmiri", regions={ "IN" } },
+  { code="ku", name="Kurdish", regions={ "TR" } },
+  { code="kw", name="Cornish", regions={ "GB" } },
+  { code="ky", name="Kyrgyz", regions={ "KG" } },
+  { code="lb", name="Luxembourgish", regions={ "LU" } },
+  { code="lg", name="Ganda", regions={ "UG" } },
+  { code="li", name="Limburgish", regions={ "BE",
"NL" } },
+  { code="lij", name="Ligurian", regions={ "IT" } },
+  { code="ln", name="Lingala", regions={ "CD" } },
+  { code="lo", name="Lao", regions={ "LA" } },
+  { code="lt", name="Lithuanian", regions={ "LT" } },
+  { code="lv", name="Latvian", regions={ "LV" } },
+  { code="lzh", name="Literary Chinese", regions={ "TW" }
},
+  { code="mag", name="Magahi", regions={ "IN" } },
+  { code="mai", name="Maithili", regions={ "IN",
"NP" } },
+  { code="mfe", name="Morisyen", regions={ "MU" } },
+  { code="mg", name="Malagasy", regions={ "MG" } },
+  { code="mhr", name="Meadow Mari", regions={ "RU" } },
+  { code="mi", name="Maori", regions={ "NZ" } },
+  { code="miq", name="Miskito", regions={ "NI" } },
+  { code="mjw", name="Karbi", regions={ "IN" } },
+  { code="mk", name="Macedonian", regions={ "MK" } },
+  { code="ml", name="Malayalam", regions={ "IN" } },
+  { code="mn", name="Mongolian", regions={ "MN" } },
+  { code="mni", name="Manipuri", regions={ "IN" } },
+  { code="mnw", name="Mon", regions={ "MM" } },
+  { code="mr", name="Marathi", regions={ "IN" } },
+  { code="ms", name="Malay", regions={ "MY" } },
+  { code="mt", name="Maltese", regions={ "MT" } },
+  { code="my", name="Burmese", regions={ "MM" } },
+  { code="nan", name="Min Nan Chinese", regions={ "TW" }
},
+  { code="nb", name="Norwegian Bokmål", regions={ "NO" }
},
+  { code="nds", name="Low German", regions={ "DE",
"NL" } },
+  { code="ne", name="Nepali", regions={ "NP" } },
+  { code="nhn", name="Tlaxcala-Puebla Nahuatl", regions={
"MX" } },
+  { code="niu", name="Niuean", regions={ "NU",
"NZ" } },
+  { code="nl", name="Dutch", regions={ "AW",
"BE", "NL" } },
+  { code="nn", name="Norwegian Nynorsk", regions={ "NO" }
},
+  { code="nr", name="South Ndebele", regions={ "ZA" } },
+  { code="nso", name="Northern Sotho", regions={ "ZA" } },
+  { code="oc", name="Occitan", regions={ "FR" } },
+  { code="om", name="Oromo", regions={ "ET", "KE"
} },
+  { code="or", name="Odia", regions={ "IN" } },
+  { code="os", name="Ossetic", regions={ "RU" } },
+  { code="pa", name="Punjabi", regions={ "IN",
"PK" } },
+  { code="pap", name="Papiamento", regions={ "AW",
"CW" } },
+  { code="pl", name="Polish", regions={ "PL" } },
+  { code="ps", name="Pashto", regions={ "AF" } },
+  { code="pt", name="Portuguese", regions={ "BR",
"PT" } },
+  { code="quz", name="Cusco Quechua", regions={ "PE" } },
+  { code="raj", name="Rajasthani", regions={ "IN" } },
+  { code="ro", name="Romanian", regions={ "RO" } },
+  { code="ru", name="Russian", regions={ "RU",
"UA" } },
+  { code="rw", name="Kinyarwanda", regions={ "RW" } },
+  { code="sa", name="Sanskrit", regions={ "IN" } },
+  { code="sah", name="Sakha", regions={ "RU" } },
+  { code="sat", name="Santali", regions={ "IN" } },
+  { code="sc", name="Sardinian", regions={ "IT" } },
+  { code="sd", name="Sindhi", regions={ "IN" } },
+  { code="se", name="Northern Sami", regions={ "NO" } },
+  { code="sgs", name="Samogitian", regions={ "LT" } },
+  { code="shn", name="Shan", regions={ "MM" } },
+  { code="shs", name="Shuswap", regions={ "CA" } },
+  { code="si", name="Sinhala", regions={ "LK" } },
+  { code="sid", name="Sidamo", regions={ "ET" } },
+  { code="sk", name="Slovak", regions={ "SK" } },
+  { code="sl", name="Slovenian", regions={ "SI" } },
+  { code="sm", name="Samoan", regions={ "WS" } },
+  { code="so", name="Somali", regions={ "DJ",
"ET", "KE", "SO" } },
+  { code="sq", name="Albanian", regions={ "AL",
"MK" } },
+  { code="sr", name="Serbian", regions={ "ME",
"RS" } },
+  { code="ss", name="Swati", regions={ "ZA" } },
+  { code="st", name="Southern Sotho", regions={ "ZA" } },
+  { code="sv", name="Swedish", regions={ "FI",
"SE" } },
+  { code="sw", name="Swahili", regions={ "KE",
"TZ" } },
+  { code="szl", name="Silesian", regions={ "PL" } },
+  { code="ta", name="Tamil", regions={ "IN", "LK"
} },
+  { code="tcy", name="Tulu", regions={ "IN" } },
+  { code="te", name="Telugu", regions={ "IN" } },
+  { code="tg", name="Tajik", regions={ "TJ" } },
+  { code="th", name="Thai", regions={ "TH" } },
+  { code="the", name="Chitwania Tharu", regions={ "NP" }
},
+  { code="ti", name="Tigrinya", regions={ "ER",
"ET" } },
+  { code="tig", name="Tigre", regions={ "ER" } },
+  { code="tk", name="Turkmen", regions={ "TM" } },
+  { code="tl", name="Tagalog", regions={ "PH" } },
+  { code="tn", name="Tswana", regions={ "ZA" } },
+  { code="to", name="Tongan", regions={ "TO" } },
+  { code="tpi", name="Tok Pisin", regions={ "PG" } },
+  { code="tr", name="Turkish", regions={ "CY",
"TR" } },
+  { code="ts", name="Tsonga", regions={ "ZA" } },
+  { code="tt", name="Tatar", regions={ "RU" } },
+  { code="ug", name="Uyghur", regions={ "CN" } },
+  { code="uk", name="Ukrainian", regions={ "UA" } },
+  { code="unm", name="Unami language", regions={ "US" } },
+  { code="ur", name="Urdu", regions={ "IN", "PK"
} },
+  { code="uz", name="Uzbek", regions={ "UZ" } },
+  { code="ve", name="Venda", regions={ "ZA" } },
+  { code="vi", name="Vietnamese", regions={ "VN" } },
+  { code="wa", name="Walloon", regions={ "BE" } },
+  { code="wae", name="Walser", regions={ "CH" } },
+  { code="wal", name="Wolaytta", regions={ "ET" } },
+  { code="wo", name="Wolof", regions={ "SN" } },
+  { code="xh", name="Xhosa", regions={ "ZA" } },
+  { code="yi", name="Yiddish", regions={ "US" } },
+  { code="yo", name="Yoruba", regions={ "NG" } },
+  { code="yue", name="Cantonese", regions={ "HK" } },
+  { code="yuw", name="Yau", regions={ "PG" } },
+  { code="zh", name="Mandarin Chinese", regions={ "CN",
"HK", "SG", "TW" } },
+  { code="zu", name="Zulu", regions={ "ZA" } } 
+}
+
+-- Prints a list of LANGUAGE "_" REGION pairs.  The output is expected
+-- to be identical to parse-SUPPORTED.py.  Called from the %%prep section.
+function print_locale_pairs()
+   for i = 1, #locales do
+      local locale = locales[i]
+      if #locale.regions == 0 then
+	 print(locale.code .. "\n")
+      else
+	 for j = 1, #locale.regions do
+	    print(locale.code .. "_" .. locale.regions[j] .. "\n")
 	 end
       end
    end
-   -- Sort for determinism.
-   table.sort(languages)
-   for _, supples in pairs(supplements) do
-      table.sort(supplements)
-   end
 end
 
--- Compute the language names
-local langnames = {}
-local python3 = io.open('/usr/bin/python3', 'r')
-if python3 then
-   python3:close()
-   local args = table.concat(languages, ' ')
-   local file = io.popen(rpm.expand("%{SOURCE13}") .. ' ' .. args)
-   while true do
-       line = file:read()
-       if line == nil then break end
-       langnames[#langnames + 1] = line
-   end
-   file:close()
-else
-   for i = 1, #languages do
-      langnames[#langnames + 1] = languages[i]
-   end
-end
-
--- Compute the Supplements: list for a language, based on the regions.
-local function compute_supplements(lang)
+local function compute_supplements(locale)
+   local lang = locale.code
+   local regions = locale.regions
    result = "langpacks-core-" .. lang
-   regions = supplements[lang]
-   if regions ~= nil then
-      for i = 1, #regions do
-	 result = result .. " or langpacks-core-" .. lang .. "_" ..
regions[i]
-      end
+   for i = 1, #regions do
+      result = result .. " or langpacks-core-" .. lang .. "_" ..
regions[i]
    end
    return result
 end
 
 -- Emit the definition of a language pack package.
-local function lang_package(lang, langname)
-   local suppl = compute_supplements(lang)
+local function lang_package(locale)
+   local lang = locale.code
+   local langname = locale.name
+   local suppl = compute_supplements(locale)
    print(rpm.expand([[
 
 %package langpack-]]..lang..[[
@@ -528,8 +763,8 @@ to support the ]]..langname..[[ language in your applications.
 ]]))
 end
 
-for i = 1, #languages do
-   lang_package(languages[i], langnames[i])
+for i = 1, #locales do
+   lang_package(locales[i])
 end
 }
 
@@ -748,17 +983,16 @@ touch `find . -name configure`
 # Ensure *-kw.h files are current to prevent regenerating them.
 touch locale/programs/*-kw.h
 
-# Verify that our copy of localedata/SUPPORTED matches the glibc
-# version.
-#
-# The separate file copy is used by the Lua parser above.
-# Patches or new upstream versions may change the list of locales,
-# which changes the set of langpacks we need to build.  Verify the
-# differences then update the copy of SUPPORTED.  This approach has
-# two purposes: (a) avoid spurious changes to the set of langpacks,
-# and (b) the Lua snippet can use a fully patched-up version
-# of the localedata/SUPPORTED file.
-diff -u %{SOURCE11} localedata/SUPPORTED
+# Verify that our locales table is compatible with the locales table
+# in the spec file.
+set +x
+echo '%{lua: print_locale_pairs()}' > localedata/SUPPORTED.spec
+set -x
+python3 %{SOURCE11} localedata/SUPPORTED > localedata/SUPPORTED.glibc
+diff -u \
+  --label "spec file" localedata/SUPPORTED.spec \
+  --label "glibc localedata/SUPPORTED" localedata/SUPPORTED.glibc
+rm localedata/SUPPORTED.spec localedata/SUPPORTED.glibc
 
 ##############################################################################
 # Build glibc...
diff --git a/parse-SUPPORTED.py b/parse-SUPPORTED.py
new file mode 100644
index 0000000..cf512de
--- /dev/null
+++ b/parse-SUPPORTED.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+#
+# This script turns localedata/SUPPORTED (whose path is passed as the
+# first argument) into a normalized list of LANGUAGE "_" REGION pairs.
+# (If there is no REGION defined, only LANGUAGE is used.)  The list
+# is written to standard output, with one element per line.
+
+import sys
+
+supported, = sys.argv[1:]
+
+# Pairs seen so far.  Used to suppress duplicates.
+seen = set()
+with open(supported) as inp:
+    for line in inp:
+        if line.startswith("#") or line == "SUPPORTED-LOCALES=\\\n":
+            # Comment or prefix.
+            continue
+        if not line.endswith(" \\\n"):
+            raise IOError("line without continuation: " + repr(line))
+        try:
+            slash = line.index("/")
+        except ValueError:
+            raise IOError("line without slash: " + repr(line))
+        spec = line[:slash]
+        for separator in ".@":
+            try:
+                # Strip charset, variant specifiers.
+                spec = spec[:spec.index(separator)]
+            except ValueError:
+                pass
+        seen.add(spec)
+
+# The C locale does not correspond to a language.
+seen.remove("C")
+
+# The glibc source file is not sorted.
+for spec in sorted(seen):
+    print(spec)
+print() # The Lua generator produces a trailing newline.

-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013