417 lines
9.2 KiB
Go
417 lines
9.2 KiB
Go
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package language
|
||
|
|
||
|
type matchTest struct {
|
||
|
comment string
|
||
|
supported string
|
||
|
test []struct{ match, desired string }
|
||
|
}
|
||
|
|
||
|
var matchTests = []matchTest{
|
||
|
{
|
||
|
"basics",
|
||
|
"fr, en-GB, en",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en-GB", "en-GB"},
|
||
|
{"en", "en-US"},
|
||
|
{"fr", "fr-FR"},
|
||
|
{"fr", "ja-JP"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"script fallbacks",
|
||
|
"zh-CN, zh-TW, iw",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"zh-TW", "zh-Hant"},
|
||
|
{"zh-CN", "zh"},
|
||
|
{"zh-CN", "zh-Hans-CN"},
|
||
|
{"zh-TW", "zh-Hant-HK"},
|
||
|
{"iw", "he-IT"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"language-specific script fallbacks 1",
|
||
|
"en, sr, nl",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"sr", "sr-Latn"},
|
||
|
{"en", "sh"},
|
||
|
{"en", "hr"},
|
||
|
{"en", "bs"},
|
||
|
{"en", "nl-Cyrl"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"language-specific script fallbacks 2",
|
||
|
"en, sh",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"sh", "sr"},
|
||
|
{"sh", "sr-Cyrl"},
|
||
|
{"sh", "hr"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"both deprecated and not",
|
||
|
"fil, tl, iw, he",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"he", "he-IT"},
|
||
|
{"he", "he"},
|
||
|
{"iw", "iw"},
|
||
|
{"fil", "fil-IT"},
|
||
|
{"fil", "fil"},
|
||
|
{"tl", "tl"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"nearby languages",
|
||
|
"en, fil, ro, nn",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"fil", "tl"},
|
||
|
{"ro", "mo"},
|
||
|
{"nn", "nb"},
|
||
|
{"en", "ja"}, // make sure default works
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"nearby languages: Nynorsk to Bokmål",
|
||
|
"en, nb",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"nb", "nn"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"nearby languages: Danish does not match nn",
|
||
|
"en, nn",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en", "da"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"nearby languages: Danish matches no",
|
||
|
"en, no",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"no", "da"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"nearby languages: Danish matches nb",
|
||
|
"en, nb",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"nb", "da"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"prefer matching languages over language variants.",
|
||
|
"nn, en-GB",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en-GB", "no, en-US"},
|
||
|
{"en-GB", "nb, en-US"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"deprecated version is closer than same language with other differences",
|
||
|
"nl, he, en-GB",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"he", "iw, en-US"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"macro equivalent is closer than same language with other differences",
|
||
|
"nl, zh, en-GB, no",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"zh", "cmn, en-US"},
|
||
|
{"no", "nb, en-US"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"legacy equivalent is closer than same language with other differences",
|
||
|
"nl, fil, en-GB",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"fil", "tl, en-US"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"exact over equivalent",
|
||
|
"en, ro, mo, ro-MD",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"ro", "ro"},
|
||
|
{"mo", "mo"},
|
||
|
{"ro-MD", "ro-MD"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"maximization of legacy",
|
||
|
"sr-Cyrl, sr-Latn, ro, ro-MD",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"sr-Latn", "sh"},
|
||
|
{"ro-MD", "mo"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"empty",
|
||
|
"",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"und", "fr"},
|
||
|
{"und", "en"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"private use subtags",
|
||
|
"fr, en-GB, x-bork, es-ES, es-419",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"fr", "x-piglatin"},
|
||
|
{"x-bork", "x-bork"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"grandfathered codes",
|
||
|
"fr, i-klingon, en-Latn-US",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en-Latn-US", "en-GB-oed"},
|
||
|
{"tlh", "i-klingon"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"exact match",
|
||
|
"fr, en-GB, ja, es-ES, es-MX",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"ja", "ja, de"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"simple variant match",
|
||
|
"fr, en-GB, ja, es-ES, es-MX",
|
||
|
[]struct{ match, desired string }{
|
||
|
// Intentionally avoiding a perfect-match or two candidates for variant matches.
|
||
|
{"en-GB", "de, en-US"},
|
||
|
// Fall back.
|
||
|
{"fr", "de, zh"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"best match for traditional Chinese",
|
||
|
// Scenario: An application that only supports Simplified Chinese (and some
|
||
|
// other languages), but does not support Traditional Chinese. zh-Hans-CN
|
||
|
// could be replaced with zh-CN, zh, or zh-Hans, it wouldn't make much of
|
||
|
// a difference.
|
||
|
"fr, zh-Hans-CN, en-US",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"zh-Hans-CN", "zh-TW"},
|
||
|
{"zh-Hans-CN", "zh-Hant"},
|
||
|
// One can avoid a zh-Hant to zh-Hans match by including a second language
|
||
|
// preference which is a better match.
|
||
|
{"en-US", "zh-TW, en"},
|
||
|
{"en-US", "zh-Hant-CN, en"},
|
||
|
{"zh-Hans-CN", "zh-Hans, en"},
|
||
|
},
|
||
|
},
|
||
|
// More specific region and script tie-breakers.
|
||
|
{
|
||
|
"more specific script should win in case regions are identical",
|
||
|
"af, af-Latn, af-Arab",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"af", "af"},
|
||
|
{"af", "af-ZA"},
|
||
|
{"af-Latn", "af-Latn-ZA"},
|
||
|
{"af-Latn", "af-Latn"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"more specific region should win",
|
||
|
"nl, nl-NL, nl-BE",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"nl", "nl"},
|
||
|
{"nl", "nl-Latn"},
|
||
|
{"nl-NL", "nl-Latn-NL"},
|
||
|
{"nl-NL", "nl-NL"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"more specific region wins over more specific script",
|
||
|
"nl, nl-Latn, nl-NL, nl-BE",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"nl", "nl"},
|
||
|
{"nl-Latn", "nl-Latn"},
|
||
|
{"nl-NL", "nl-NL"},
|
||
|
{"nl-NL", "nl-Latn-NL"},
|
||
|
},
|
||
|
},
|
||
|
// Region distance tie-breakers.
|
||
|
{
|
||
|
"region distance Portuguese",
|
||
|
"pt, pt-PT",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"pt-PT", "pt-ES"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"region distance French",
|
||
|
"en, fr, fr-CA, fr-CH",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"fr-CA", "fr-US"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"region distance German",
|
||
|
"de-AT, de-DE, de-CH",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"de-DE", "de"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"en-AU is closer to en-GB than to en (which is en-US)",
|
||
|
"en, en-GB, es-ES, es-419",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en-GB", "en-AU"},
|
||
|
{"es-419", "es-MX"},
|
||
|
{"es-ES", "es-PT"},
|
||
|
},
|
||
|
},
|
||
|
// Test exceptions with "und".
|
||
|
// When the undefined language doesn't match anything in the list, return the default, as usual.
|
||
|
// max("und") = "en-Latn-US", and since matching is based on maximized tags, the undefined
|
||
|
// language would normally match English. But that would produce the counterintuitive results.
|
||
|
// Matching "und" to "it,en" would be "en" matching "en" to "it,und" would be "und".
|
||
|
// To avoid this max("und") is defined as "und"
|
||
|
{
|
||
|
"undefined",
|
||
|
"it, fr",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"it", "und"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"und does not match en",
|
||
|
"it, en",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"it", "und"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"undefined in priority list",
|
||
|
"it, und",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"und", "und"},
|
||
|
{"it", "en"},
|
||
|
},
|
||
|
},
|
||
|
// Undefined scripts and regions.
|
||
|
{
|
||
|
"undefined",
|
||
|
"it, fr, zh",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"fr", "und-FR"},
|
||
|
{"zh", "und-CN"},
|
||
|
{"zh", "und-Hans"},
|
||
|
{"zh", "und-Hant"},
|
||
|
{"it", "und-Latn"},
|
||
|
},
|
||
|
},
|
||
|
// Early termination conditions: do not consider all desired strings if
|
||
|
// a match is good enough.
|
||
|
{
|
||
|
"match on maximized tag",
|
||
|
"fr, en-GB, ja, es-ES, es-MX",
|
||
|
[]struct{ match, desired string }{
|
||
|
// ja-JP matches ja on likely subtags, and it's listed first,
|
||
|
// thus it wins over the second preference en-GB.
|
||
|
{"ja", "ja-JP, en-GB"},
|
||
|
{"ja", "ja-Jpan-JP, en-GB"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"pick best maximized tag",
|
||
|
"ja, ja-Jpan-US, ja-JP, en, ru",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"ja", "ja-Jpan, ru"},
|
||
|
{"ja-JP", "ja-JP, ru"},
|
||
|
{"ja-Jpan-US", "ja-US, ru"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"termination: pick best maximized match",
|
||
|
"ja, ja-Jpan, ja-JP, en, ru",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"ja-JP", "ja-Jpan-JP, ru"},
|
||
|
{"ja-Jpan", "ja-Jpan, ru"},
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
"no match on maximized",
|
||
|
"en, de, fr, ja",
|
||
|
[]struct{ match, desired string }{
|
||
|
// de maximizes to de-DE.
|
||
|
// Pick the exact match for the secondary language instead.
|
||
|
{"fr", "de-CH, fr"},
|
||
|
},
|
||
|
},
|
||
|
|
||
|
// Test that the CLDR parent relations are correctly preserved by the matcher.
|
||
|
// These matches may change for different CLDR versions.
|
||
|
{
|
||
|
"parent relation preserved",
|
||
|
"en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"en-GB", "en-150"},
|
||
|
{"en-GB", "en-AU"},
|
||
|
{"en-GB", "en-BE"},
|
||
|
{"en-GB", "en-GG"},
|
||
|
{"en-GB", "en-GI"},
|
||
|
{"en-GB", "en-HK"},
|
||
|
{"en-GB", "en-IE"},
|
||
|
{"en-GB", "en-IM"},
|
||
|
{"en-GB", "en-IN"},
|
||
|
{"en-GB", "en-JE"},
|
||
|
{"en-GB", "en-MT"},
|
||
|
{"en-GB", "en-NZ"},
|
||
|
{"en-GB", "en-PK"},
|
||
|
{"en-GB", "en-SG"},
|
||
|
{"en-GB", "en-DE"},
|
||
|
{"en-GB", "en-MT"},
|
||
|
{"es-419", "es-AR"},
|
||
|
{"es-419", "es-BO"},
|
||
|
{"es-419", "es-CL"},
|
||
|
{"es-419", "es-CO"},
|
||
|
{"es-419", "es-CR"},
|
||
|
{"es-419", "es-CU"},
|
||
|
{"es-419", "es-DO"},
|
||
|
{"es-419", "es-EC"},
|
||
|
{"es-419", "es-GT"},
|
||
|
{"es-419", "es-HN"},
|
||
|
{"es-419", "es-MX"},
|
||
|
{"es-419", "es-NI"},
|
||
|
{"es-419", "es-PA"},
|
||
|
{"es-419", "es-PE"},
|
||
|
{"es-419", "es-PR"},
|
||
|
{"es-419", "es-PY"},
|
||
|
{"es-419", "es-SV"},
|
||
|
{"es-419", "es-US"},
|
||
|
{"es-419", "es-UY"},
|
||
|
{"es-419", "es-VE"},
|
||
|
{"pt-PT", "pt-AO"},
|
||
|
{"pt-PT", "pt-CV"},
|
||
|
{"pt-PT", "pt-GW"},
|
||
|
{"pt-PT", "pt-MO"},
|
||
|
{"pt-PT", "pt-MZ"},
|
||
|
{"pt-PT", "pt-ST"},
|
||
|
{"pt-PT", "pt-TL"},
|
||
|
// TODO for CLDR 24+
|
||
|
// - en-001
|
||
|
// - {"zh-Hant-HK", "zh-Hant-MO"},
|
||
|
},
|
||
|
},
|
||
|
// Options and variants are inherited from user-defined settings.
|
||
|
{
|
||
|
"preserve Unicode extension",
|
||
|
"en, de, sl-nedis",
|
||
|
[]struct{ match, desired string }{
|
||
|
{"de-u-co-phonebk", "de-FR-u-co-phonebk"},
|
||
|
{"sl-nedis-u-cu-eur", "sl-nedis-u-cu-eur"},
|
||
|
{"sl-nedis-u-cu-eur", "sl-u-cu-eur"},
|
||
|
{"sl-nedis-u-cu-eur", "sl-HR-nedis-u-cu-eur"},
|
||
|
},
|
||
|
},
|
||
|
}
|