4
0
mirror of https://github.com/cwinfo/matterbridge.git synced 2025-07-03 16:37:44 +00:00

Add vendor github.com/dfordsoft/golib/ic

This commit is contained in:
Wim
2018-05-11 21:54:32 +02:00
parent 406a54b597
commit bf0b9959d1
60 changed files with 36737 additions and 28431 deletions

View File

@ -33,32 +33,32 @@ var (
ISO8859_8I encoding.Encoding = &iso8859_8I
iso8859_6E = internal.Encoding{
ISO8859_6,
"ISO-8859-6E",
identifier.ISO88596E,
Encoding: ISO8859_6,
Name: "ISO-8859-6E",
MIB: identifier.ISO88596E,
}
iso8859_6I = internal.Encoding{
ISO8859_6,
"ISO-8859-6I",
identifier.ISO88596I,
Encoding: ISO8859_6,
Name: "ISO-8859-6I",
MIB: identifier.ISO88596I,
}
iso8859_8E = internal.Encoding{
ISO8859_8,
"ISO-8859-8E",
identifier.ISO88598E,
Encoding: ISO8859_8,
Name: "ISO-8859-8E",
MIB: identifier.ISO88598E,
}
iso8859_8I = internal.Encoding{
ISO8859_8,
"ISO-8859-8I",
identifier.ISO88598I,
Encoding: ISO8859_8,
Name: "ISO-8859-8I",
MIB: identifier.ISO88598I,
}
)
// All is a list of all defined encodings in this package.
var All = listAll
var All []encoding.Encoding = listAll
// TODO: implement these encodings, in order of importance.
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
@ -70,8 +70,8 @@ type utf8Enc struct {
data [3]byte
}
// charmap describes an 8-bit character set encoding.
type charmap struct {
// Charmap is an 8-bit character set encoding.
type Charmap struct {
// name is the encoding's name.
name string
// mib is the encoding type of this encoder.
@ -79,7 +79,7 @@ type charmap struct {
// asciiSuperset states whether the encoding is a superset of ASCII.
asciiSuperset bool
// low is the lower bound of the encoded byte for a non-ASCII rune. If
// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
// Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
low uint8
// replacement is the encoded replacement character.
replacement byte
@ -91,26 +91,30 @@ type charmap struct {
encode [256]uint32
}
func (m *charmap) NewDecoder() *encoding.Decoder {
// NewDecoder implements the encoding.Encoding interface.
func (m *Charmap) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
}
func (m *charmap) NewEncoder() *encoding.Encoder {
// NewEncoder implements the encoding.Encoding interface.
func (m *Charmap) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
}
func (m *charmap) String() string {
// String returns the Charmap's name.
func (m *Charmap) String() string {
return m.name
}
func (m *charmap) ID() (mib identifier.MIB, other string) {
// ID implements an internal interface.
func (m *Charmap) ID() (mib identifier.MIB, other string) {
return m.mib, ""
}
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
type charmapDecoder struct {
transform.NopResetter
charmap *charmap
charmap *Charmap
}
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -142,10 +146,22 @@ func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int,
return nDst, nSrc, err
}
// DecodeByte returns the Charmap's rune decoding of the byte b.
func (m *Charmap) DecodeByte(b byte) rune {
switch x := &m.decode[b]; x.len {
case 1:
return rune(x.data[0])
case 2:
return rune(x.data[0]&0x1f)<<6 | rune(x.data[1]&0x3f)
default:
return rune(x.data[0]&0x0f)<<12 | rune(x.data[1]&0x3f)<<6 | rune(x.data[2]&0x3f)
}
}
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
type charmapEncoder struct {
transform.NopResetter
charmap *charmap
charmap *Charmap
}
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -207,3 +223,27 @@ loop:
}
return nDst, nSrc, err
}
// EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether
// r is in the Charmap's repertoire. If not, b is set to the Charmap's
// replacement byte. This is often the ASCII substitute character '\x1a'.
func (m *Charmap) EncodeRune(r rune) (b byte, ok bool) {
if r < utf8.RuneSelf && m.asciiSuperset {
return byte(r), true
}
for low, high := int(m.low), 0x100; ; {
if low >= high {
return m.replacement, false
}
mid := (low + high) / 2
got := m.encode[mid]
gotRune := rune(got & (1<<24 - 1))
if gotRune < r {
low = mid + 1
} else if gotRune > r {
high = mid
} else {
return byte(got >> 24), true
}
}
}

View File

@ -494,7 +494,7 @@ func main() {
if e.comment != "" {
printf("//\n// %s\n", e.comment)
}
printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
@ -540,7 +540,7 @@ func main() {
}
printf("},\n}\n")
// Add an estimate of the size of a single charmap{} struct value, which
// Add an estimate of the size of a single Charmap{} struct value, which
// includes two 256 elem arrays of 4 bytes and some extra fields, which
// align to 3 uint64s on 64-bit architectures.
w.Size += 2*4*256 + 3*8

View File

@ -1,4 +1,4 @@
// This file was generated by go generate; DO NOT EDIT
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package charmap
@ -8,9 +8,9 @@ import (
)
// CodePage037 is the IBM Code Page 037 encoding.
var CodePage037 encoding.Encoding = &codePage037
var CodePage037 *Charmap = &codePage037
var codePage037 = charmap{
var codePage037 = Charmap{
name: "IBM Code Page 037",
mib: identifier.IBM037,
asciiSuperset: false,
@ -183,9 +183,9 @@ var codePage037 = charmap{
}
// CodePage437 is the IBM Code Page 437 encoding.
var CodePage437 encoding.Encoding = &codePage437
var CodePage437 *Charmap = &codePage437
var codePage437 = charmap{
var codePage437 = Charmap{
name: "IBM Code Page 437",
mib: identifier.PC8CodePage437,
asciiSuperset: true,
@ -358,9 +358,9 @@ var codePage437 = charmap{
}
// CodePage850 is the IBM Code Page 850 encoding.
var CodePage850 encoding.Encoding = &codePage850
var CodePage850 *Charmap = &codePage850
var codePage850 = charmap{
var codePage850 = Charmap{
name: "IBM Code Page 850",
mib: identifier.PC850Multilingual,
asciiSuperset: true,
@ -533,9 +533,9 @@ var codePage850 = charmap{
}
// CodePage852 is the IBM Code Page 852 encoding.
var CodePage852 encoding.Encoding = &codePage852
var CodePage852 *Charmap = &codePage852
var codePage852 = charmap{
var codePage852 = Charmap{
name: "IBM Code Page 852",
mib: identifier.PCp852,
asciiSuperset: true,
@ -708,9 +708,9 @@ var codePage852 = charmap{
}
// CodePage855 is the IBM Code Page 855 encoding.
var CodePage855 encoding.Encoding = &codePage855
var CodePage855 *Charmap = &codePage855
var codePage855 = charmap{
var codePage855 = Charmap{
name: "IBM Code Page 855",
mib: identifier.IBM855,
asciiSuperset: true,
@ -883,9 +883,9 @@ var codePage855 = charmap{
}
// CodePage858 is the Windows Code Page 858 encoding.
var CodePage858 encoding.Encoding = &codePage858
var CodePage858 *Charmap = &codePage858
var codePage858 = charmap{
var codePage858 = Charmap{
name: "Windows Code Page 858",
mib: identifier.IBM00858,
asciiSuperset: true,
@ -1058,9 +1058,9 @@ var codePage858 = charmap{
}
// CodePage860 is the IBM Code Page 860 encoding.
var CodePage860 encoding.Encoding = &codePage860
var CodePage860 *Charmap = &codePage860
var codePage860 = charmap{
var codePage860 = Charmap{
name: "IBM Code Page 860",
mib: identifier.IBM860,
asciiSuperset: true,
@ -1233,9 +1233,9 @@ var codePage860 = charmap{
}
// CodePage862 is the IBM Code Page 862 encoding.
var CodePage862 encoding.Encoding = &codePage862
var CodePage862 *Charmap = &codePage862
var codePage862 = charmap{
var codePage862 = Charmap{
name: "IBM Code Page 862",
mib: identifier.PC862LatinHebrew,
asciiSuperset: true,
@ -1408,9 +1408,9 @@ var codePage862 = charmap{
}
// CodePage863 is the IBM Code Page 863 encoding.
var CodePage863 encoding.Encoding = &codePage863
var CodePage863 *Charmap = &codePage863
var codePage863 = charmap{
var codePage863 = Charmap{
name: "IBM Code Page 863",
mib: identifier.IBM863,
asciiSuperset: true,
@ -1583,9 +1583,9 @@ var codePage863 = charmap{
}
// CodePage865 is the IBM Code Page 865 encoding.
var CodePage865 encoding.Encoding = &codePage865
var CodePage865 *Charmap = &codePage865
var codePage865 = charmap{
var codePage865 = Charmap{
name: "IBM Code Page 865",
mib: identifier.IBM865,
asciiSuperset: true,
@ -1758,9 +1758,9 @@ var codePage865 = charmap{
}
// CodePage866 is the IBM Code Page 866 encoding.
var CodePage866 encoding.Encoding = &codePage866
var CodePage866 *Charmap = &codePage866
var codePage866 = charmap{
var codePage866 = Charmap{
name: "IBM Code Page 866",
mib: identifier.IBM866,
asciiSuperset: true,
@ -1933,9 +1933,9 @@ var codePage866 = charmap{
}
// CodePage1047 is the IBM Code Page 1047 encoding.
var CodePage1047 encoding.Encoding = &codePage1047
var CodePage1047 *Charmap = &codePage1047
var codePage1047 = charmap{
var codePage1047 = Charmap{
name: "IBM Code Page 1047",
mib: identifier.IBM1047,
asciiSuperset: false,
@ -2108,9 +2108,9 @@ var codePage1047 = charmap{
}
// CodePage1140 is the IBM Code Page 1140 encoding.
var CodePage1140 encoding.Encoding = &codePage1140
var CodePage1140 *Charmap = &codePage1140
var codePage1140 = charmap{
var codePage1140 = Charmap{
name: "IBM Code Page 1140",
mib: identifier.IBM01140,
asciiSuperset: false,
@ -2283,9 +2283,9 @@ var codePage1140 = charmap{
}
// ISO8859_1 is the ISO 8859-1 encoding.
var ISO8859_1 encoding.Encoding = &iso8859_1
var ISO8859_1 *Charmap = &iso8859_1
var iso8859_1 = charmap{
var iso8859_1 = Charmap{
name: "ISO 8859-1",
mib: identifier.ISOLatin1,
asciiSuperset: true,
@ -2458,9 +2458,9 @@ var iso8859_1 = charmap{
}
// ISO8859_2 is the ISO 8859-2 encoding.
var ISO8859_2 encoding.Encoding = &iso8859_2
var ISO8859_2 *Charmap = &iso8859_2
var iso8859_2 = charmap{
var iso8859_2 = Charmap{
name: "ISO 8859-2",
mib: identifier.ISOLatin2,
asciiSuperset: true,
@ -2633,9 +2633,9 @@ var iso8859_2 = charmap{
}
// ISO8859_3 is the ISO 8859-3 encoding.
var ISO8859_3 encoding.Encoding = &iso8859_3
var ISO8859_3 *Charmap = &iso8859_3
var iso8859_3 = charmap{
var iso8859_3 = Charmap{
name: "ISO 8859-3",
mib: identifier.ISOLatin3,
asciiSuperset: true,
@ -2808,9 +2808,9 @@ var iso8859_3 = charmap{
}
// ISO8859_4 is the ISO 8859-4 encoding.
var ISO8859_4 encoding.Encoding = &iso8859_4
var ISO8859_4 *Charmap = &iso8859_4
var iso8859_4 = charmap{
var iso8859_4 = Charmap{
name: "ISO 8859-4",
mib: identifier.ISOLatin4,
asciiSuperset: true,
@ -2983,9 +2983,9 @@ var iso8859_4 = charmap{
}
// ISO8859_5 is the ISO 8859-5 encoding.
var ISO8859_5 encoding.Encoding = &iso8859_5
var ISO8859_5 *Charmap = &iso8859_5
var iso8859_5 = charmap{
var iso8859_5 = Charmap{
name: "ISO 8859-5",
mib: identifier.ISOLatinCyrillic,
asciiSuperset: true,
@ -3158,9 +3158,9 @@ var iso8859_5 = charmap{
}
// ISO8859_6 is the ISO 8859-6 encoding.
var ISO8859_6 encoding.Encoding = &iso8859_6
var ISO8859_6 *Charmap = &iso8859_6
var iso8859_6 = charmap{
var iso8859_6 = Charmap{
name: "ISO 8859-6",
mib: identifier.ISOLatinArabic,
asciiSuperset: true,
@ -3333,9 +3333,9 @@ var iso8859_6 = charmap{
}
// ISO8859_7 is the ISO 8859-7 encoding.
var ISO8859_7 encoding.Encoding = &iso8859_7
var ISO8859_7 *Charmap = &iso8859_7
var iso8859_7 = charmap{
var iso8859_7 = Charmap{
name: "ISO 8859-7",
mib: identifier.ISOLatinGreek,
asciiSuperset: true,
@ -3508,9 +3508,9 @@ var iso8859_7 = charmap{
}
// ISO8859_8 is the ISO 8859-8 encoding.
var ISO8859_8 encoding.Encoding = &iso8859_8
var ISO8859_8 *Charmap = &iso8859_8
var iso8859_8 = charmap{
var iso8859_8 = Charmap{
name: "ISO 8859-8",
mib: identifier.ISOLatinHebrew,
asciiSuperset: true,
@ -3683,9 +3683,9 @@ var iso8859_8 = charmap{
}
// ISO8859_9 is the ISO 8859-9 encoding.
var ISO8859_9 encoding.Encoding = &iso8859_9
var ISO8859_9 *Charmap = &iso8859_9
var iso8859_9 = charmap{
var iso8859_9 = Charmap{
name: "ISO 8859-9",
mib: identifier.ISOLatin5,
asciiSuperset: true,
@ -3858,9 +3858,9 @@ var iso8859_9 = charmap{
}
// ISO8859_10 is the ISO 8859-10 encoding.
var ISO8859_10 encoding.Encoding = &iso8859_10
var ISO8859_10 *Charmap = &iso8859_10
var iso8859_10 = charmap{
var iso8859_10 = Charmap{
name: "ISO 8859-10",
mib: identifier.ISOLatin6,
asciiSuperset: true,
@ -4033,9 +4033,9 @@ var iso8859_10 = charmap{
}
// ISO8859_13 is the ISO 8859-13 encoding.
var ISO8859_13 encoding.Encoding = &iso8859_13
var ISO8859_13 *Charmap = &iso8859_13
var iso8859_13 = charmap{
var iso8859_13 = Charmap{
name: "ISO 8859-13",
mib: identifier.ISO885913,
asciiSuperset: true,
@ -4208,9 +4208,9 @@ var iso8859_13 = charmap{
}
// ISO8859_14 is the ISO 8859-14 encoding.
var ISO8859_14 encoding.Encoding = &iso8859_14
var ISO8859_14 *Charmap = &iso8859_14
var iso8859_14 = charmap{
var iso8859_14 = Charmap{
name: "ISO 8859-14",
mib: identifier.ISO885914,
asciiSuperset: true,
@ -4383,9 +4383,9 @@ var iso8859_14 = charmap{
}
// ISO8859_15 is the ISO 8859-15 encoding.
var ISO8859_15 encoding.Encoding = &iso8859_15
var ISO8859_15 *Charmap = &iso8859_15
var iso8859_15 = charmap{
var iso8859_15 = Charmap{
name: "ISO 8859-15",
mib: identifier.ISO885915,
asciiSuperset: true,
@ -4558,9 +4558,9 @@ var iso8859_15 = charmap{
}
// ISO8859_16 is the ISO 8859-16 encoding.
var ISO8859_16 encoding.Encoding = &iso8859_16
var ISO8859_16 *Charmap = &iso8859_16
var iso8859_16 = charmap{
var iso8859_16 = Charmap{
name: "ISO 8859-16",
mib: identifier.ISO885916,
asciiSuperset: true,
@ -4733,9 +4733,9 @@ var iso8859_16 = charmap{
}
// KOI8R is the KOI8-R encoding.
var KOI8R encoding.Encoding = &koi8R
var KOI8R *Charmap = &koi8R
var koi8R = charmap{
var koi8R = Charmap{
name: "KOI8-R",
mib: identifier.KOI8R,
asciiSuperset: true,
@ -4908,9 +4908,9 @@ var koi8R = charmap{
}
// KOI8U is the KOI8-U encoding.
var KOI8U encoding.Encoding = &koi8U
var KOI8U *Charmap = &koi8U
var koi8U = charmap{
var koi8U = Charmap{
name: "KOI8-U",
mib: identifier.KOI8U,
asciiSuperset: true,
@ -5083,9 +5083,9 @@ var koi8U = charmap{
}
// Macintosh is the Macintosh encoding.
var Macintosh encoding.Encoding = &macintosh
var Macintosh *Charmap = &macintosh
var macintosh = charmap{
var macintosh = Charmap{
name: "Macintosh",
mib: identifier.Macintosh,
asciiSuperset: true,
@ -5258,9 +5258,9 @@ var macintosh = charmap{
}
// MacintoshCyrillic is the Macintosh Cyrillic encoding.
var MacintoshCyrillic encoding.Encoding = &macintoshCyrillic
var MacintoshCyrillic *Charmap = &macintoshCyrillic
var macintoshCyrillic = charmap{
var macintoshCyrillic = Charmap{
name: "Macintosh Cyrillic",
mib: identifier.MacintoshCyrillic,
asciiSuperset: true,
@ -5433,9 +5433,9 @@ var macintoshCyrillic = charmap{
}
// Windows874 is the Windows 874 encoding.
var Windows874 encoding.Encoding = &windows874
var Windows874 *Charmap = &windows874
var windows874 = charmap{
var windows874 = Charmap{
name: "Windows 874",
mib: identifier.Windows874,
asciiSuperset: true,
@ -5608,9 +5608,9 @@ var windows874 = charmap{
}
// Windows1250 is the Windows 1250 encoding.
var Windows1250 encoding.Encoding = &windows1250
var Windows1250 *Charmap = &windows1250
var windows1250 = charmap{
var windows1250 = Charmap{
name: "Windows 1250",
mib: identifier.Windows1250,
asciiSuperset: true,
@ -5783,9 +5783,9 @@ var windows1250 = charmap{
}
// Windows1251 is the Windows 1251 encoding.
var Windows1251 encoding.Encoding = &windows1251
var Windows1251 *Charmap = &windows1251
var windows1251 = charmap{
var windows1251 = Charmap{
name: "Windows 1251",
mib: identifier.Windows1251,
asciiSuperset: true,
@ -5958,9 +5958,9 @@ var windows1251 = charmap{
}
// Windows1252 is the Windows 1252 encoding.
var Windows1252 encoding.Encoding = &windows1252
var Windows1252 *Charmap = &windows1252
var windows1252 = charmap{
var windows1252 = Charmap{
name: "Windows 1252",
mib: identifier.Windows1252,
asciiSuperset: true,
@ -6133,9 +6133,9 @@ var windows1252 = charmap{
}
// Windows1253 is the Windows 1253 encoding.
var Windows1253 encoding.Encoding = &windows1253
var Windows1253 *Charmap = &windows1253
var windows1253 = charmap{
var windows1253 = Charmap{
name: "Windows 1253",
mib: identifier.Windows1253,
asciiSuperset: true,
@ -6308,9 +6308,9 @@ var windows1253 = charmap{
}
// Windows1254 is the Windows 1254 encoding.
var Windows1254 encoding.Encoding = &windows1254
var Windows1254 *Charmap = &windows1254
var windows1254 = charmap{
var windows1254 = Charmap{
name: "Windows 1254",
mib: identifier.Windows1254,
asciiSuperset: true,
@ -6483,9 +6483,9 @@ var windows1254 = charmap{
}
// Windows1255 is the Windows 1255 encoding.
var Windows1255 encoding.Encoding = &windows1255
var Windows1255 *Charmap = &windows1255
var windows1255 = charmap{
var windows1255 = Charmap{
name: "Windows 1255",
mib: identifier.Windows1255,
asciiSuperset: true,
@ -6593,7 +6593,7 @@ var windows1255 = charmap{
{2, [3]byte{0xd6, 0xb4, 0x00}}, {2, [3]byte{0xd6, 0xb5, 0x00}},
{2, [3]byte{0xd6, 0xb6, 0x00}}, {2, [3]byte{0xd6, 0xb7, 0x00}},
{2, [3]byte{0xd6, 0xb8, 0x00}}, {2, [3]byte{0xd6, 0xb9, 0x00}},
{3, [3]byte{0xef, 0xbf, 0xbd}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
{2, [3]byte{0xd6, 0xba, 0x00}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
{2, [3]byte{0xd6, 0xbc, 0x00}}, {2, [3]byte{0xd6, 0xbd, 0x00}},
{2, [3]byte{0xd6, 0xbe, 0x00}}, {2, [3]byte{0xd6, 0xbf, 0x00}},
{2, [3]byte{0xd7, 0x80, 0x00}}, {2, [3]byte{0xd7, 0x81, 0x00}},
@ -6643,24 +6643,24 @@ var windows1255 = charmap{
0xb20000b2, 0xb30000b3, 0xb40000b4, 0xb50000b5, 0xb60000b6, 0xb70000b7, 0xb80000b8, 0xb90000b9,
0xbb0000bb, 0xbc0000bc, 0xbd0000bd, 0xbe0000be, 0xbf0000bf, 0xaa0000d7, 0xba0000f7, 0x83000192,
0x880002c6, 0x980002dc, 0xc00005b0, 0xc10005b1, 0xc20005b2, 0xc30005b3, 0xc40005b4, 0xc50005b5,
0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd, 0xce0005be,
0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1, 0xe20005d2,
0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9, 0xea0005da,
0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1, 0xf20005e2,
0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9, 0xfa0005ea,
0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f, 0x96002013,
0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e, 0x86002020,
0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa, 0x800020ac,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xca0005ba, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd,
0xce0005be, 0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1,
0xe20005d2, 0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9,
0xea0005da, 0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1,
0xf20005e2, 0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9,
0xfa0005ea, 0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f,
0x96002013, 0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e,
0x86002020, 0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa,
0x800020ac, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
},
}
// Windows1256 is the Windows 1256 encoding.
var Windows1256 encoding.Encoding = &windows1256
var Windows1256 *Charmap = &windows1256
var windows1256 = charmap{
var windows1256 = Charmap{
name: "Windows 1256",
mib: identifier.Windows1256,
asciiSuperset: true,
@ -6833,9 +6833,9 @@ var windows1256 = charmap{
}
// Windows1257 is the Windows 1257 encoding.
var Windows1257 encoding.Encoding = &windows1257
var Windows1257 *Charmap = &windows1257
var windows1257 = charmap{
var windows1257 = Charmap{
name: "Windows 1257",
mib: identifier.Windows1257,
asciiSuperset: true,
@ -7008,9 +7008,9 @@ var windows1257 = charmap{
}
// Windows1258 is the Windows 1258 encoding.
var Windows1258 encoding.Encoding = &windows1258
var Windows1258 *Charmap = &windows1258
var windows1258 = charmap{
var windows1258 = Charmap{
name: "Windows 1258",
mib: identifier.Windows1258,
asciiSuperset: true,
@ -7185,9 +7185,9 @@ var windows1258 = charmap{
// XUserDefined is the X-User-Defined encoding.
//
// It is defined at http://encoding.spec.whatwg.org/#x-user-defined
var XUserDefined encoding.Encoding = &xUserDefined
var XUserDefined *Charmap = &xUserDefined
var xUserDefined = charmap{
var xUserDefined = Charmap{
name: "X-User-Defined",
mib: identifier.XUserDefined,
asciiSuperset: true,

View File

@ -133,7 +133,10 @@ var consts = map[string]string{
// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
{"und", "windows-1252"}, // The default value.
// The default value. Explicitly state latin to benefit from the exact
// script option, while still making 1252 the default encoding for languages
// written in Latin script.
{"und_Latn", "windows-1252"},
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},

View File

@ -50,7 +50,7 @@ func LanguageDefault(tag language.Tag) string {
for _, t := range strings.Split(locales, " ") {
tags = append(tags, language.MustParse(t))
}
matcher = language.NewMatcher(tags)
matcher = language.NewMatcher(tags, language.PreferSameScript(true))
})
_, i, _ := matcher.Match(tag)
return canonical[localeMap[i]] // Default is Windows-1252.

View File

@ -1,4 +1,4 @@
// This file was generated by go generate; DO NOT EDIT
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package htmlindex
@ -313,7 +313,7 @@ var nameMap = map[string]htmlEncoding{
}
var localeMap = []htmlEncoding{
windows1252, // und
windows1252, // und_Latn
windows1256, // ar
windows1251, // ba
windows1251, // be
@ -349,4 +349,4 @@ var localeMap = []htmlEncoding{
big5, // zh-hant
}
const locales = "und ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
const locales = "und_Latn ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"

192
vendor/golang.org/x/text/encoding/ianaindex/gen.go generated vendored Normal file
View File

@ -0,0 +1,192 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"encoding/xml"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
x := &indexInfo{}
for _, rec := range reg.Registry[0].Record {
mib := identifier.MIB(parseInt(rec.MIB))
x.addEntry(mib, rec.Name)
for _, a := range rec.Alias {
a = strings.Split(a, " ")[0] // strip comments.
x.addAlias(a, mib)
// MIB name aliases are prefixed with a "cs" (character set) in the
// registry to identify them as display names and to ensure that
// the name starts with a lowercase letter in case it is used as
// an identifier. We remove it to be left with a nice clean name.
if strings.HasPrefix(a, "cs") {
x.setName(2, a[2:])
}
}
if rec.MIME != "" {
x.addAlias(rec.MIME, mib)
x.setName(1, rec.MIME)
}
}
w := gen.NewCodeWriter()
fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`)
writeIndex(w, x)
w.WriteGoFile("tables.go", "ianaindex")
}
type alias struct {
name string
mib identifier.MIB
}
type indexInfo struct {
// compacted index from code to MIB
codeToMIB []identifier.MIB
alias []alias
names [][3]string
}
func (ii *indexInfo) Len() int {
return len(ii.codeToMIB)
}
func (ii *indexInfo) Less(a, b int) bool {
return ii.codeToMIB[a] < ii.codeToMIB[b]
}
func (ii *indexInfo) Swap(a, b int) {
ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
// Co-sort the names.
ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
}
func (ii *indexInfo) setName(i int, name string) {
ii.names[len(ii.names)-1][i] = name
}
func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
ii.names = append(ii.names, [3]string{name, name, name})
ii.addAlias(name, mib)
ii.codeToMIB = append(ii.codeToMIB, mib)
}
func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
// Don't add duplicates for the same mib. Adding duplicate aliases for
// different MIBs will cause the compiler to barf on an invalid map: great!.
for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
if ii.alias[i].name == name {
return
}
}
ii.alias = append(ii.alias, alias{name, mib})
lower := strings.ToLower(name)
if lower != name {
ii.addAlias(lower, mib)
}
}
const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
func writeIndex(w *gen.CodeWriter, x *indexInfo) {
sort.Stable(x)
// Write constants.
fmt.Fprintln(w, "const (")
for i, m := range x.codeToMIB {
if i == 0 {
fmt.Fprintf(w, "enc%d = iota\n", m)
} else {
fmt.Fprintf(w, "enc%d\n", m)
}
}
fmt.Fprintln(w, "numIANA")
fmt.Fprintln(w, ")")
w.WriteVar("ianaToMIB", x.codeToMIB)
var ianaNames, mibNames []string
for _, names := range x.names {
n := names[0]
if names[0] != names[1] {
// MIME names are mostly identical to IANA names. We share the
// tables by setting the first byte of the string to an index into
// the string itself (< maxMIMENameLen) to the IANA name. The MIME
// name immediately follows the index.
x := len(names[1]) + 1
if x > maxMIMENameLen {
log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
}
n = string(x) + names[1] + names[0]
}
ianaNames = append(ianaNames, n)
mibNames = append(mibNames, names[2])
}
w.WriteVar("ianaNames", ianaNames)
w.WriteVar("mibNames", mibNames)
w.WriteComment(`
TODO: Instead of using a map, we could use binary search strings doing
on-the fly lower-casing per character. This allows to always avoid
allocation and will be considerably more compact.`)
fmt.Fprintln(w, "var ianaAliases = map[string]int{")
for _, a := range x.alias {
fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
}
fmt.Fprintln(w, "}")
}
func parseInt(s string) int {
x, err := strconv.ParseInt(s, 10, 64)
if err != nil {
log.Fatalf("Could not parse integer: %v", err)
}
return int(x)
}

View File

@ -2,17 +2,28 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package ianaindex maps names to Encodings as specified by the IANA registry.
// This includes both the MIME and IANA names.
//
// Status: this package is an incomplete API sketch, and isn't usable yet.
//
// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
// more details.
package ianaindex
import (
"errors"
"sort"
"strings"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)
// TODO: remove the "Status... incomplete" in the package doc comment.
@ -25,28 +36,68 @@ import (
// support MIME otherwise.
var (
// MIME is an index to map MIME names. It does not support aliases.
MIME *Index
// MIME is an index to map MIME names.
MIME *Index = mime
// IANA is an index that supports all names and aliases using IANA names as
// the canonical identifier.
IANA *Index
IANA *Index = iana
// MIB is an index that associates the MIB display name with an Encoding.
MIB *Index = mib
mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
mib = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
)
// Index maps names registered by IANA to Encodings.
// Currently different Indexes only differ in the names they return for
// encodings. In the future they may also differ in supported aliases.
type Index struct {
names func(i int) string
toMIB []identifier.MIB // Sorted slice of supported MIBs
alias map[string]int
enc []encoding.Encoding
}
// Get returns an Encoding for IANA-registered names. Matching is
var (
errInvalidName = errors.New("ianaindex: invalid encoding name")
errUnknown = errors.New("ianaindex: unknown Encoding")
errUnsupported = errors.New("ianaindex: unsupported Encoding")
)
// Encoding returns an Encoding for IANA-registered names. Matching is
// case-insensitive.
func (x *Index) Get(name string) (encoding.Encoding, error) {
panic("TODO: implement")
func (x *Index) Encoding(name string) (encoding.Encoding, error) {
name = strings.TrimSpace(name)
// First try without lowercasing (possibly creating an allocation).
i, ok := x.alias[name]
if !ok {
i, ok = x.alias[strings.ToLower(name)]
if !ok {
return nil, errInvalidName
}
}
return x.enc[i], nil
}
// Name reports the canonical name of the given Encoding. It will return an
// error if the e is not associated with a known encoding scheme.
func (x *Index) Name(e encoding.Encoding) (string, error) {
panic("TODO: implement")
id, ok := e.(identifier.Interface)
if !ok {
return "", errUnknown
}
mib, _ := id.ID()
if mib == 0 {
return "", errUnknown
}
v := findMIB(x.toMIB, mib)
if v == -1 {
return "", errUnsupported
}
return x.names(v), nil
}
// TODO: the coverage of this index is rather spotty. Allowing users to set
@ -65,3 +116,94 @@ func (x *Index) Name(e encoding.Encoding) (string, error) {
// func (x *Index) Set(name string, e encoding.Encoding) error {
// panic("TODO: implement")
// }
func findMIB(x []identifier.MIB, mib identifier.MIB) int {
i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
if i < len(x) && x[i] == mib {
return i
}
return -1
}
const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
func mimeName(x int) string {
n := ianaNames[x]
// See gen.go for a description of the encoding.
if n[0] <= maxMIMENameLen {
return n[1:n[0]]
}
return n
}
func ianaName(x int) string {
n := ianaNames[x]
// See gen.go for a description of the encoding.
if n[0] <= maxMIMENameLen {
return n[n[0]:]
}
return n
}
func mibName(x int) string {
return mibNames[x]
}
var encodings = [numIANA]encoding.Encoding{
enc106: unicode.UTF8,
enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
enc2028: charmap.CodePage037,
enc2011: charmap.CodePage437,
enc2009: charmap.CodePage850,
enc2010: charmap.CodePage852,
enc2046: charmap.CodePage855,
enc2089: charmap.CodePage858,
enc2048: charmap.CodePage860,
enc2013: charmap.CodePage862,
enc2050: charmap.CodePage863,
enc2052: charmap.CodePage865,
enc2086: charmap.CodePage866,
enc2102: charmap.CodePage1047,
enc2091: charmap.CodePage1140,
enc4: charmap.ISO8859_1,
enc5: charmap.ISO8859_2,
enc6: charmap.ISO8859_3,
enc7: charmap.ISO8859_4,
enc8: charmap.ISO8859_5,
enc9: charmap.ISO8859_6,
enc81: charmap.ISO8859_6E,
enc82: charmap.ISO8859_6I,
enc10: charmap.ISO8859_7,
enc11: charmap.ISO8859_8,
enc84: charmap.ISO8859_8E,
enc85: charmap.ISO8859_8I,
enc12: charmap.ISO8859_9,
enc13: charmap.ISO8859_10,
enc109: charmap.ISO8859_13,
enc110: charmap.ISO8859_14,
enc111: charmap.ISO8859_15,
enc112: charmap.ISO8859_16,
enc2084: charmap.KOI8R,
enc2088: charmap.KOI8U,
enc2027: charmap.Macintosh,
enc2109: charmap.Windows874,
enc2250: charmap.Windows1250,
enc2251: charmap.Windows1251,
enc2252: charmap.Windows1252,
enc2253: charmap.Windows1253,
enc2254: charmap.Windows1254,
enc2255: charmap.Windows1255,
enc2256: charmap.Windows1256,
enc2257: charmap.Windows1257,
enc2258: charmap.Windows1258,
enc18: japanese.EUCJP,
enc39: japanese.ISO2022JP,
enc17: japanese.ShiftJIS,
enc38: korean.EUCKR,
enc114: simplifiedchinese.GB18030,
enc113: simplifiedchinese.GBK,
enc2085: simplifiedchinese.HZGB2312,
enc2026: traditionalchinese.Big5,
}

2348
vendor/golang.org/x/text/encoding/ianaindex/tables.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,180 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package enctest
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// Encoder or Decoder
type Transcoder interface {
transform.Transformer
Bytes([]byte) ([]byte, error)
String(string) (string, error)
}
func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) {
for _, direction := range []string{"Decode", "Encode"} {
t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) {
var coder Transcoder
var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
if direction == "Decode" {
coder, want, src = e.NewDecoder(), utf8, encoded
wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix
} else {
coder, want, src = e.NewEncoder(), encoded, utf8
wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, ""
}
dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
if err != nil {
t.Fatal(err)
}
if nDst != len(wPrefix)+len(want)+len(wSuffix) {
t.Fatalf("nDst got %d, want %d",
nDst, len(wPrefix)+len(want)+len(wSuffix))
}
if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
t.Fatalf("nSrc got %d, want %d",
nSrc, len(sPrefix)+len(src)+len(sSuffix))
}
if got := string(dst); got != wPrefix+want+wSuffix {
t.Fatalf("\ngot %q\nwant %q", got, wPrefix+want+wSuffix)
}
for _, n := range []int{0, 1, 2, 10, 123, 4567} {
input := sPrefix + strings.Repeat(src, n) + sSuffix
g, err := coder.String(input)
if err != nil {
t.Fatalf("Bytes: n=%d: %v", n, err)
}
if len(g) == 0 && len(input) == 0 {
// If the input is empty then the output can be empty,
// regardless of whatever wPrefix is.
continue
}
got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix
if got1 != want1 {
t.Fatalf("ReadAll: n=%d\ngot %q\nwant %q",
n, trim(got1), trim(want1))
}
}
})
}
}
func TestFile(t *testing.T, e encoding.Encoding) {
for _, dir := range []string{"Decode", "Encode"} {
t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) {
dst, src, transformer, err := load(dir, e)
if err != nil {
t.Fatalf("load: %v", err)
}
buf, err := transformer.Bytes(src)
if err != nil {
t.Fatalf("transform: %v", err)
}
if !bytes.Equal(buf, dst) {
t.Error("transformed bytes did not match golden file")
}
})
}
}
func Benchmark(b *testing.B, enc encoding.Encoding) {
for _, direction := range []string{"Decode", "Encode"} {
b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) {
_, src, transformer, err := load(direction, enc)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := transform.NewReader(bytes.NewReader(src), transformer)
io.Copy(ioutil.Discard, r)
}
})
}
}
// testdataFiles are files in testdata/*.txt.
var testdataFiles = []struct {
mib identifier.MIB
basename, ext string
}{
{identifier.Windows1252, "candide", "windows-1252"},
{identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"},
{identifier.ISO2022JP, "rashomon", "iso-2022-jp"},
{identifier.ShiftJIS, "rashomon", "shift-jis"},
{identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
{identifier.GBK, "sunzi-bingfa-simplified", "gbk"},
{identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
{identifier.Big5, "sunzi-bingfa-traditional", "big5"},
{identifier.UTF16LE, "candide", "utf-16le"},
{identifier.UTF8, "candide", "utf-8"},
{identifier.UTF32BE, "candide", "utf-32be"},
// GB18030 is a superset of GBK and is nominally a Simplified Chinese
// encoding, but it can also represent the entire Basic Multilingual
// Plane, including codepoints like 'â' that aren't encodable by GBK.
// GB18030 on Simplified Chinese should perform similarly to GBK on
// Simplified Chinese. GB18030 on "candide" is more interesting.
{identifier.GB18030, "candide", "gb18030"},
}
func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
basename, ext, count := "", "", 0
for _, tf := range testdataFiles {
if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib {
basename, ext = tf.basename, tf.ext
count++
}
}
if count != 1 {
if count == 0 {
return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
}
return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
}
dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext)
srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename)
var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
if direction == "Decode" {
dstFile, srcFile = srcFile, dstFile
coder = enc.NewDecoder()
}
dst, err := ioutil.ReadFile(dstFile)
if err != nil {
if dst, err = ioutil.ReadFile("../" + dstFile); err != nil {
return nil, nil, nil, err
}
}
src, err := ioutil.ReadFile(srcFile)
if err != nil {
if src, err = ioutil.ReadFile("../" + srcFile); err != nil {
return nil, nil, nil, err
}
}
return dst, src, coder, nil
}
func trim(s string) string {
if len(s) < 120 {
return s
}
return s[:50] + "..." + s[len(s)-50:]
}

View File

@ -1,4 +1,4 @@
// This file was generated by go generate; DO NOT EDIT
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package identifier

View File

@ -5,7 +5,6 @@
package japanese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -23,10 +22,9 @@ var eucJP = internal.Encoding{
identifier.EUCPkdFmtJapanese,
}
var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
type eucJPDecoder struct{ transform.NopResetter }
// See https://encoding.spec.whatwg.org/#euc-jp-decoder.
func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
@ -37,60 +35,79 @@ loop:
case c0 == 0x8e:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xdf < c1 {
err = errInvalidEUCJP
break loop
switch {
case c1 < 0xa1:
r, size = utf8.RuneError, 1
case c1 > 0xdf:
r, size = utf8.RuneError, 2
if c1 == 0xff {
size = 1
}
default:
r, size = rune(c1)+(0xff61-0xa1), 2
}
r, size = rune(c1)+(0xff61-0xa1), 2
case c0 == 0x8f:
if nSrc+2 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
if p := nSrc + 1; p < len(src) && 0xa1 <= src[p] && src[p] < 0xfe {
size = 2
}
break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
err = errInvalidEUCJP
break loop
r, size = utf8.RuneError, 1
break
}
c2 := src[nSrc+2]
if c2 < 0xa1 || 0xfe < c2 {
err = errInvalidEUCJP
break loop
r, size = utf8.RuneError, 2
break
}
r, size = '\ufffd', 3
r, size = utf8.RuneError, 3
if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
r = rune(jis0212Decode[i])
if r == 0 {
r = '\ufffd'
r = utf8.RuneError
}
}
case 0xa1 <= c0 && c0 <= 0xfe:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
err = errInvalidEUCJP
break loop
r, size = utf8.RuneError, 1
break
}
r, size = '\ufffd', 2
r, size = utf8.RuneError, 2
if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
r = rune(jis0208Decode[i])
if r == 0 {
r = '\ufffd'
r = utf8.RuneError
}
}
default:
err = errInvalidEUCJP
break loop
r, size = utf8.RuneError, 1
}
if nDst+utf8.RuneLen(r) > len(dst) {
@ -99,9 +116,6 @@ loop:
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidEUCJP
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package japanese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -31,8 +30,6 @@ func iso2022JPNewEncoder() transform.Transformer {
return new(iso2022JPEncoder)
}
var errInvalidISO2022JP = errors.New("japanese: invalid ISO-2022-JP encoding")
const (
asciiState = iota
katakanaState
@ -50,45 +47,51 @@ func (d *iso2022JPDecoder) Reset() {
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
err = errInvalidISO2022JP
break loop
r, size = '\ufffd', 1
goto write
}
if c0 == asciiEsc {
if nSrc+2 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
return nDst, nSrc, transform.ErrShortSrc
}
// TODO: is it correct to only skip 1??
r, size = '\ufffd', 1
goto write
}
size = 3
c1 := src[nSrc+1]
c2 := src[nSrc+2]
switch {
case c1 == '$' && (c2 == '@' || c2 == 'B'):
case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
*d = jis0208State
continue
case c1 == '$' && c2 == '(':
case c1 == '$' && c2 == '(': // 0x24 0x28
if nSrc+3 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
return nDst, nSrc, transform.ErrShortSrc
}
r, size = '\ufffd', 1
goto write
}
size = 4
if src[nSrc]+3 == 'D' {
if src[nSrc+3] == 'D' {
*d = jis0212State
continue
}
case c1 == '(' && (c2 == 'B' || c2 == 'J'):
case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
*d = asciiState
continue
case c1 == '(' && c2 == 'I':
case c1 == '(' && c2 == 'I': // 0x28 0x49
*d = katakanaState
continue
}
err = errInvalidISO2022JP
break loop
r, size = '\ufffd', 1
goto write
}
switch *d {
@ -97,8 +100,8 @@ loop:
case katakanaState:
if c0 < 0x21 || 0x60 <= c0 {
err = errInvalidISO2022JP
break loop
r, size = '\ufffd', 1
goto write
}
r, size = rune(c0)+(0xff61-0x21), 1
@ -106,11 +109,14 @@ loop:
if c0 == 0x0a {
*d = asciiState
r, size = rune(c0), 1
break
goto write
}
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
return nDst, nSrc, transform.ErrShortSrc
}
r, size = '\ufffd', 1
goto write
}
size = 2
c1 := src[nSrc+1]
@ -121,22 +127,19 @@ loop:
r = rune(jis0212Decode[i])
} else {
r = '\ufffd'
break
goto write
}
if r == 0 {
r = '\ufffd'
}
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
return nDst, nSrc, transform.ErrShortDst
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidISO2022JP
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package japanese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -24,8 +23,6 @@ var shiftJIS = internal.Encoding{
identifier.ShiftJIS,
}
var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
type shiftJISDecoder struct{ transform.NopResetter }
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -48,28 +45,32 @@ loop:
c0 = 2*c0 - 0x21
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = '\ufffd', 1
goto write
}
c1 := src[nSrc+1]
switch {
case c1 < 0x40:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
goto write
case c1 < 0x7f:
c0--
c1 -= 0x40
case c1 == 0x7f:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
goto write
case c1 < 0x9f:
c0--
c1 -= 0x41
case c1 < 0xfd:
c1 -= 0x9f
default:
err = errInvalidShiftJIS
break loop
r, size = '\ufffd', 2
goto write
}
r, size = '\ufffd', 2
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
@ -79,20 +80,19 @@ loop:
}
}
default:
err = errInvalidShiftJIS
break loop
}
case c0 == 0x80:
r, size = 0x80, 1
default:
r, size = '\ufffd', 1
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidShiftJIS
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package korean
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -26,8 +25,6 @@ var eucKR = internal.Encoding{
identifier.EUCKR,
}
var errInvalidEUCKR = errors.New("korean: invalid EUC-KR encoding")
type eucKRDecoder struct{ transform.NopResetter }
func (eucKRDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -40,10 +37,15 @@ loop:
case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
break
}
c1 := src[nSrc+1]
size = 2
if c0 < 0xc7 {
r = 178 * rune(c0-0x81)
switch {
@ -54,39 +56,36 @@ loop:
case 0x81 <= c1 && c1 < 0xff:
r += rune(c1) - (0x81 - 2*26)
default:
err = errInvalidEUCKR
break loop
goto decError
}
} else if 0xa1 <= c1 && c1 < 0xff {
r = 178*(0xc7-0x81) + rune(c0-0xc7)*94 + rune(c1-0xa1)
} else {
err = errInvalidEUCKR
break loop
goto decError
}
if int(r) < len(decode) {
r = rune(decode[r])
if r == 0 {
r = '\ufffd'
if r != 0 {
break
}
} else {
r = '\ufffd'
}
size = 2
decError:
r = utf8.RuneError
if c1 < utf8.RuneSelf {
size = 1
}
default:
err = errInvalidEUCKR
break loop
r, size = utf8.RuneError, 1
break
}
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidEUCKR
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package simplifiedchinese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -40,11 +39,6 @@ var gbk18030 = internal.Encoding{
identifier.GB18030,
}
var (
errInvalidGB18030 = errors.New("simplifiedchinese: invalid GB18030 encoding")
errInvalidGBK = errors.New("simplifiedchinese: invalid GBK encoding")
)
type gbkDecoder struct {
transform.NopResetter
gb18030 bool
@ -66,8 +60,12 @@ loop:
case c0 < 0xff:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
c1 := src[nSrc+1]
switch {
@ -77,18 +75,24 @@ loop:
c1 -= 0x41
case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
if nSrc+3 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
// The second byte here is always ASCII, so we can set size
// to 1 in all cases.
r, size = utf8.RuneError, 1
goto write
}
c2 := src[nSrc+2]
if c2 < 0x81 || 0xff <= c2 {
err = errInvalidGB18030
break loop
r, size = utf8.RuneError, 1
goto write
}
c3 := src[nSrc+3]
if c3 < 0x30 || 0x3a <= c3 {
err = errInvalidGB18030
break loop
r, size = utf8.RuneError, 1
goto write
}
size = 4
r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
@ -109,17 +113,13 @@ loop:
r -= 189000
if 0 <= r && r < 0x100000 {
r += 0x10000
goto write
}
err = errInvalidGB18030
break loop
default:
if d.gb18030 {
err = errInvalidGB18030
} else {
err = errInvalidGBK
r, size = utf8.RuneError, 1
}
break loop
goto write
default:
r, size = utf8.RuneError, 1
goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
@ -130,12 +130,7 @@ loop:
}
default:
if d.gb18030 {
err = errInvalidGB18030
} else {
err = errInvalidGBK
}
break loop
r, size = utf8.RuneError, 1
}
write:
@ -145,13 +140,6 @@ loop:
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
if d.gb18030 {
err = errInvalidGB18030
} else {
err = errInvalidGBK
}
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package simplifiedchinese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -31,8 +30,6 @@ func hzGB2312NewEncoder() transform.Transformer {
return new(hzGB2312Encoder)
}
var errInvalidHZGB2312 = errors.New("simplifiedchinese: invalid HZ-GB2312 encoding")
const (
asciiState = iota
gbState
@ -50,14 +47,18 @@ loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
err = errInvalidHZGB2312
break loop
r, size = utf8.RuneError, 1
goto write
}
if c0 == '~' {
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r = utf8.RuneError
goto write
}
size = 2
switch src[nSrc+1] {
@ -78,8 +79,8 @@ loop:
case '\n':
continue
default:
err = errInvalidHZGB2312
break loop
r = utf8.RuneError
goto write
}
}
@ -87,33 +88,37 @@ loop:
r, size = rune(c0), 1
} else {
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
size = 2
c1 := src[nSrc+1]
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
err = errInvalidHZGB2312
break loop
}
r, size = '\ufffd', 2
if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
// error
} else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
r = rune(decode[i])
if r == 0 {
r = '\ufffd'
if r != 0 {
goto write
}
}
if c1 > utf8.RuneSelf {
// Be consistent and always treat non-ASCII as a single error.
size = 1
}
r = utf8.RuneError
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidHZGB2312
}
return nDst, nSrc, err
}

View File

@ -5,7 +5,6 @@
package traditionalchinese
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@ -26,8 +25,6 @@ var big5 = internal.Encoding{
identifier.Big5,
}
var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")
type big5Decoder struct{ transform.NopResetter }
func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -40,8 +37,12 @@ loop:
case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
c1 := src[nSrc+1]
switch {
@ -49,9 +50,12 @@ loop:
c1 -= 0x40
case 0xa1 <= c1 && c1 < 0xff:
c1 -= 0x62
case c1 < 0x40:
r, size = utf8.RuneError, 1
goto write
default:
err = errInvalidBig5
break loop
r, size = utf8.RuneError, 2
goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
@ -80,10 +84,10 @@ loop:
}
default:
err = errInvalidBig5
break loop
r, size = utf8.RuneError, 1
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
@ -99,9 +103,6 @@ loop:
nDst += copy(dst[nDst:], s)
continue loop
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidBig5
}
return nDst, nSrc, err
}