mirror of
https://github.com/cwinfo/matterbridge.git
synced 2025-07-03 16:37:44 +00:00
Use mod vendor for vendored directory (backwards compatible)
This commit is contained in:
27
vendor/golang.org/x/text/encoding/LICENSE
generated
vendored
27
vendor/golang.org/x/text/encoding/LICENSE
generated
vendored
@ -1,27 +0,0 @@
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
249
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
249
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
@ -1,249 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run maketables.go
|
||||
|
||||
// Package charmap provides simple character encodings such as IBM Code Page 437
|
||||
// and Windows 1252.
|
||||
package charmap // import "golang.org/x/text/encoding/charmap"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// These encodings vary only in the way clients should interpret them. Their
|
||||
// coded character set is identical and a single implementation can be shared.
|
||||
var (
|
||||
// ISO8859_6E is the ISO 8859-6E encoding.
|
||||
ISO8859_6E encoding.Encoding = &iso8859_6E
|
||||
|
||||
// ISO8859_6I is the ISO 8859-6I encoding.
|
||||
ISO8859_6I encoding.Encoding = &iso8859_6I
|
||||
|
||||
// ISO8859_8E is the ISO 8859-8E encoding.
|
||||
ISO8859_8E encoding.Encoding = &iso8859_8E
|
||||
|
||||
// ISO8859_8I is the ISO 8859-8I encoding.
|
||||
ISO8859_8I encoding.Encoding = &iso8859_8I
|
||||
|
||||
iso8859_6E = internal.Encoding{
|
||||
Encoding: ISO8859_6,
|
||||
Name: "ISO-8859-6E",
|
||||
MIB: identifier.ISO88596E,
|
||||
}
|
||||
|
||||
iso8859_6I = internal.Encoding{
|
||||
Encoding: ISO8859_6,
|
||||
Name: "ISO-8859-6I",
|
||||
MIB: identifier.ISO88596I,
|
||||
}
|
||||
|
||||
iso8859_8E = internal.Encoding{
|
||||
Encoding: ISO8859_8,
|
||||
Name: "ISO-8859-8E",
|
||||
MIB: identifier.ISO88598E,
|
||||
}
|
||||
|
||||
iso8859_8I = internal.Encoding{
|
||||
Encoding: ISO8859_8,
|
||||
Name: "ISO-8859-8I",
|
||||
MIB: identifier.ISO88598I,
|
||||
}
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All []encoding.Encoding = listAll
|
||||
|
||||
// TODO: implement these encodings, in order of importance.
|
||||
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
|
||||
// ISO8859_9: Close to Windows 1254.
|
||||
|
||||
// utf8Enc holds a rune's UTF-8 encoding in data[:len].
|
||||
type utf8Enc struct {
|
||||
len uint8
|
||||
data [3]byte
|
||||
}
|
||||
|
||||
// Charmap is an 8-bit character set encoding.
|
||||
type Charmap struct {
|
||||
// name is the encoding's name.
|
||||
name string
|
||||
// mib is the encoding type of this encoder.
|
||||
mib identifier.MIB
|
||||
// asciiSuperset states whether the encoding is a superset of ASCII.
|
||||
asciiSuperset bool
|
||||
// low is the lower bound of the encoded byte for a non-ASCII rune. If
|
||||
// Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
|
||||
low uint8
|
||||
// replacement is the encoded replacement character.
|
||||
replacement byte
|
||||
// decode is the map from encoded byte to UTF-8.
|
||||
decode [256]utf8Enc
|
||||
// encoding is the map from runes to encoded bytes. Each entry is a
|
||||
// uint32: the high 8 bits are the encoded byte and the low 24 bits are
|
||||
// the rune. The table entries are sorted by ascending rune.
|
||||
encode [256]uint32
|
||||
}
|
||||
|
||||
// NewDecoder implements the encoding.Encoding interface.
|
||||
func (m *Charmap) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
|
||||
}
|
||||
|
||||
// NewEncoder implements the encoding.Encoding interface.
|
||||
func (m *Charmap) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
|
||||
}
|
||||
|
||||
// String returns the Charmap's name.
|
||||
func (m *Charmap) String() string {
|
||||
return m.name
|
||||
}
|
||||
|
||||
// ID implements an internal interface.
|
||||
func (m *Charmap) ID() (mib identifier.MIB, other string) {
|
||||
return m.mib, ""
|
||||
}
|
||||
|
||||
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
|
||||
type charmapDecoder struct {
|
||||
transform.NopResetter
|
||||
charmap *Charmap
|
||||
}
|
||||
|
||||
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for i, c := range src {
|
||||
if m.charmap.asciiSuperset && c < utf8.RuneSelf {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = c
|
||||
nDst++
|
||||
nSrc = i + 1
|
||||
continue
|
||||
}
|
||||
|
||||
decode := &m.charmap.decode[c]
|
||||
n := int(decode.len)
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
// It's 15% faster to avoid calling copy for these tiny slices.
|
||||
for j := 0; j < n; j++ {
|
||||
dst[nDst] = decode.data[j]
|
||||
nDst++
|
||||
}
|
||||
nSrc = i + 1
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// DecodeByte returns the Charmap's rune decoding of the byte b.
|
||||
func (m *Charmap) DecodeByte(b byte) rune {
|
||||
switch x := &m.decode[b]; x.len {
|
||||
case 1:
|
||||
return rune(x.data[0])
|
||||
case 2:
|
||||
return rune(x.data[0]&0x1f)<<6 | rune(x.data[1]&0x3f)
|
||||
default:
|
||||
return rune(x.data[0]&0x0f)<<12 | rune(x.data[1]&0x3f)<<6 | rune(x.data[2]&0x3f)
|
||||
}
|
||||
}
|
||||
|
||||
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
|
||||
type charmapEncoder struct {
|
||||
transform.NopResetter
|
||||
charmap *Charmap
|
||||
}
|
||||
|
||||
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for nSrc < len(src) {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
if m.charmap.asciiSuperset {
|
||||
nSrc++
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Binary search in [low, high) for that rune in the m.charmap.encode table.
|
||||
for low, high := int(m.charmap.low), 0x100; ; {
|
||||
if low >= high {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
break loop
|
||||
}
|
||||
mid := (low + high) / 2
|
||||
got := m.charmap.encode[mid]
|
||||
gotRune := rune(got & (1<<24 - 1))
|
||||
if gotRune < r {
|
||||
low = mid + 1
|
||||
} else if gotRune > r {
|
||||
high = mid
|
||||
} else {
|
||||
dst[nDst] = byte(got >> 24)
|
||||
nDst++
|
||||
break
|
||||
}
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether
|
||||
// r is in the Charmap's repertoire. If not, b is set to the Charmap's
|
||||
// replacement byte. This is often the ASCII substitute character '\x1a'.
|
||||
func (m *Charmap) EncodeRune(r rune) (b byte, ok bool) {
|
||||
if r < utf8.RuneSelf && m.asciiSuperset {
|
||||
return byte(r), true
|
||||
}
|
||||
for low, high := int(m.low), 0x100; ; {
|
||||
if low >= high {
|
||||
return m.replacement, false
|
||||
}
|
||||
mid := (low + high) / 2
|
||||
got := m.encode[mid]
|
||||
gotRune := rune(got & (1<<24 - 1))
|
||||
if gotRune < r {
|
||||
low = mid + 1
|
||||
} else if gotRune > r {
|
||||
high = mid
|
||||
} else {
|
||||
return byte(got >> 24), true
|
||||
}
|
||||
}
|
||||
}
|
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
@ -1,556 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
|
||||
` !"#$%&'()*+,-./0123456789:;<=>?` +
|
||||
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
|
||||
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
|
||||
|
||||
var encodings = []struct {
|
||||
name string
|
||||
mib string
|
||||
comment string
|
||||
varName string
|
||||
replacement byte
|
||||
mapping string
|
||||
}{
|
||||
{
|
||||
"IBM Code Page 037",
|
||||
"IBM037",
|
||||
"",
|
||||
"CodePage037",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 437",
|
||||
"PC8CodePage437",
|
||||
"",
|
||||
"CodePage437",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 850",
|
||||
"PC850Multilingual",
|
||||
"",
|
||||
"CodePage850",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 852",
|
||||
"PCp852",
|
||||
"",
|
||||
"CodePage852",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 855",
|
||||
"IBM855",
|
||||
"",
|
||||
"CodePage855",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"Windows Code Page 858", // PC latin1 with Euro
|
||||
"IBM00858",
|
||||
"",
|
||||
"CodePage858",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 860",
|
||||
"IBM860",
|
||||
"",
|
||||
"CodePage860",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 862",
|
||||
"PC862LatinHebrew",
|
||||
"",
|
||||
"CodePage862",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 863",
|
||||
"IBM863",
|
||||
"",
|
||||
"CodePage863",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 865",
|
||||
"IBM865",
|
||||
"",
|
||||
"CodePage865",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 866",
|
||||
"IBM866",
|
||||
"",
|
||||
"CodePage866",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-ibm866.txt",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1047",
|
||||
"IBM1047",
|
||||
"",
|
||||
"CodePage1047",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1140",
|
||||
"IBM01140",
|
||||
"",
|
||||
"CodePage1140",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-1",
|
||||
"ISOLatin1",
|
||||
"",
|
||||
"ISO8859_1",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-2",
|
||||
"ISOLatin2",
|
||||
"",
|
||||
"ISO8859_2",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-3",
|
||||
"ISOLatin3",
|
||||
"",
|
||||
"ISO8859_3",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-4",
|
||||
"ISOLatin4",
|
||||
"",
|
||||
"ISO8859_4",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-5",
|
||||
"ISOLatinCyrillic",
|
||||
"",
|
||||
"ISO8859_5",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-6",
|
||||
"ISOLatinArabic",
|
||||
"",
|
||||
"ISO8859_6,ISO8859_6E,ISO8859_6I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-7",
|
||||
"ISOLatinGreek",
|
||||
"",
|
||||
"ISO8859_7",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-8",
|
||||
"ISOLatinHebrew",
|
||||
"",
|
||||
"ISO8859_8,ISO8859_8E,ISO8859_8I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-9",
|
||||
"ISOLatin5",
|
||||
"",
|
||||
"ISO8859_9",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-10",
|
||||
"ISOLatin6",
|
||||
"",
|
||||
"ISO8859_10",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-13",
|
||||
"ISO885913",
|
||||
"",
|
||||
"ISO8859_13",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-14",
|
||||
"ISO885914",
|
||||
"",
|
||||
"ISO8859_14",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-15",
|
||||
"ISO885915",
|
||||
"",
|
||||
"ISO8859_15",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-16",
|
||||
"ISO885916",
|
||||
"",
|
||||
"ISO8859_16",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-R",
|
||||
"KOI8R",
|
||||
"",
|
||||
"KOI8R",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-U",
|
||||
"KOI8U",
|
||||
"",
|
||||
"KOI8U",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh",
|
||||
"Macintosh",
|
||||
"",
|
||||
"Macintosh",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-macintosh.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh Cyrillic",
|
||||
"MacintoshCyrillic",
|
||||
"",
|
||||
"MacintoshCyrillic",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
|
||||
},
|
||||
{
|
||||
"Windows 874",
|
||||
"Windows874",
|
||||
"",
|
||||
"Windows874",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-874.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1250",
|
||||
"Windows1250",
|
||||
"",
|
||||
"Windows1250",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1251",
|
||||
"Windows1251",
|
||||
"",
|
||||
"Windows1251",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1252",
|
||||
"Windows1252",
|
||||
"",
|
||||
"Windows1252",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1253",
|
||||
"Windows1253",
|
||||
"",
|
||||
"Windows1253",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1254",
|
||||
"Windows1254",
|
||||
"",
|
||||
"Windows1254",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1255",
|
||||
"Windows1255",
|
||||
"",
|
||||
"Windows1255",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1256",
|
||||
"Windows1256",
|
||||
"",
|
||||
"Windows1256",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1257",
|
||||
"Windows1257",
|
||||
"",
|
||||
"Windows1257",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1258",
|
||||
"Windows1258",
|
||||
"",
|
||||
"Windows1258",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
|
||||
},
|
||||
{
|
||||
"X-User-Defined",
|
||||
"XUserDefined",
|
||||
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
|
||||
"XUserDefined",
|
||||
encoding.ASCIISub,
|
||||
ascii +
|
||||
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
|
||||
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
|
||||
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
|
||||
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
|
||||
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
|
||||
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
|
||||
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
|
||||
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
|
||||
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
|
||||
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
|
||||
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
|
||||
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
|
||||
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
|
||||
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
|
||||
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
|
||||
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
|
||||
},
|
||||
}
|
||||
|
||||
func getWHATWG(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 128)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, 0
|
||||
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 128 <= x {
|
||||
log.Fatalf("code %d is out of range", x)
|
||||
}
|
||||
if 0x80 <= y && y < 0xa0 {
|
||||
// We diverge from the WHATWG spec by mapping control characters
|
||||
// in the range [0x80, 0xa0) to U+FFFD.
|
||||
continue
|
||||
}
|
||||
mapping[x] = rune(y)
|
||||
}
|
||||
return ascii + string(mapping)
|
||||
}
|
||||
|
||||
func getUCM(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 256)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
charsFound := 0
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
var c byte
|
||||
var r rune
|
||||
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
|
||||
continue
|
||||
}
|
||||
mapping[c] = r
|
||||
charsFound++
|
||||
}
|
||||
|
||||
if charsFound < 200 {
|
||||
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
|
||||
}
|
||||
|
||||
return string(mapping)
|
||||
}
|
||||
|
||||
func main() {
|
||||
mibs := map[string]bool{}
|
||||
all := []string{}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "charmap")
|
||||
|
||||
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
|
||||
|
||||
printf("import (\n")
|
||||
printf("\t\"golang.org/x/text/encoding\"\n")
|
||||
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
|
||||
printf(")\n\n")
|
||||
for _, e := range encodings {
|
||||
varNames := strings.Split(e.varName, ",")
|
||||
all = append(all, varNames...)
|
||||
varName := varNames[0]
|
||||
switch {
|
||||
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
|
||||
e.mapping = getWHATWG(e.mapping)
|
||||
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
|
||||
e.mapping = getUCM(e.mapping)
|
||||
}
|
||||
|
||||
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
|
||||
if asciiSuperset {
|
||||
low = 0x80
|
||||
}
|
||||
lvn := 1
|
||||
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
|
||||
lvn = 3
|
||||
}
|
||||
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
|
||||
printf("// %s is the %s encoding.\n", varName, e.name)
|
||||
if e.comment != "" {
|
||||
printf("//\n// %s\n", e.comment)
|
||||
}
|
||||
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
|
||||
varName, lowerVarName, lowerVarName, e.name)
|
||||
if mibs[e.mib] {
|
||||
log.Fatalf("MIB type %q declared multiple times.", e.mib)
|
||||
}
|
||||
printf("mib: identifier.%s,\n", e.mib)
|
||||
printf("asciiSuperset: %t,\n", asciiSuperset)
|
||||
printf("low: 0x%02x,\n", low)
|
||||
printf("replacement: 0x%02x,\n", e.replacement)
|
||||
|
||||
printf("decode: [256]utf8Enc{\n")
|
||||
i, backMapping := 0, map[rune]byte{}
|
||||
for _, c := range e.mapping {
|
||||
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
|
||||
backMapping[c] = byte(i)
|
||||
}
|
||||
var buf [8]byte
|
||||
n := utf8.EncodeRune(buf[:], c)
|
||||
if n > 3 {
|
||||
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
|
||||
}
|
||||
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
|
||||
if i%2 == 1 {
|
||||
printf("\n")
|
||||
}
|
||||
i++
|
||||
}
|
||||
printf("},\n")
|
||||
|
||||
printf("encode: [256]uint32{\n")
|
||||
encode := make([]uint32, 0, 256)
|
||||
for c, i := range backMapping {
|
||||
encode = append(encode, uint32(i)<<24|uint32(c))
|
||||
}
|
||||
sort.Sort(byRune(encode))
|
||||
for len(encode) < cap(encode) {
|
||||
encode = append(encode, encode[len(encode)-1])
|
||||
}
|
||||
for i, enc := range encode {
|
||||
printf("0x%08x,", enc)
|
||||
if i%8 == 7 {
|
||||
printf("\n")
|
||||
}
|
||||
}
|
||||
printf("},\n}\n")
|
||||
|
||||
// Add an estimate of the size of a single Charmap{} struct value, which
|
||||
// includes two 256 elem arrays of 4 bytes and some extra fields, which
|
||||
// align to 3 uint64s on 64-bit architectures.
|
||||
w.Size += 2*4*256 + 3*8
|
||||
}
|
||||
// TODO: add proper line breaking.
|
||||
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
|
||||
}
|
||||
|
||||
type byRune []uint32
|
||||
|
||||
func (b byRune) Len() int { return len(b) }
|
||||
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
|
||||
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
7410
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
7410
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
File diff suppressed because it is too large
Load Diff
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
@ -1,173 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
Encodings []struct {
|
||||
Labels []string
|
||||
Name string
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
|
||||
var groups []group
|
||||
if err := json.NewDecoder(r).Decode(&groups); err != nil {
|
||||
log.Fatalf("Error reading encodings.json: %v", err)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintln(w, "type htmlEncoding byte")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
if name == "" {
|
||||
log.Fatalf("No const defined for %s.", key)
|
||||
}
|
||||
if i == 0 {
|
||||
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
|
||||
} else {
|
||||
fmt.Fprintf(w, "%s\n", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "numEncodings")
|
||||
fmt.Fprint(w, ")\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
for _, l := range e.Labels {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
fmt.Fprintf(w, "%q: %s,\n", l, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
var tags []string
|
||||
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
|
||||
for _, loc := range locales {
|
||||
tags = append(tags, loc.tag)
|
||||
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
|
||||
|
||||
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
|
||||
}
|
||||
|
||||
// consts maps canonical encoding name to internal constant.
|
||||
var consts = map[string]string{
|
||||
"utf-8": "utf8",
|
||||
"ibm866": "ibm866",
|
||||
"iso-8859-2": "iso8859_2",
|
||||
"iso-8859-3": "iso8859_3",
|
||||
"iso-8859-4": "iso8859_4",
|
||||
"iso-8859-5": "iso8859_5",
|
||||
"iso-8859-6": "iso8859_6",
|
||||
"iso-8859-7": "iso8859_7",
|
||||
"iso-8859-8": "iso8859_8",
|
||||
"iso-8859-8-i": "iso8859_8I",
|
||||
"iso-8859-10": "iso8859_10",
|
||||
"iso-8859-13": "iso8859_13",
|
||||
"iso-8859-14": "iso8859_14",
|
||||
"iso-8859-15": "iso8859_15",
|
||||
"iso-8859-16": "iso8859_16",
|
||||
"koi8-r": "koi8r",
|
||||
"koi8-u": "koi8u",
|
||||
"macintosh": "macintosh",
|
||||
"windows-874": "windows874",
|
||||
"windows-1250": "windows1250",
|
||||
"windows-1251": "windows1251",
|
||||
"windows-1252": "windows1252",
|
||||
"windows-1253": "windows1253",
|
||||
"windows-1254": "windows1254",
|
||||
"windows-1255": "windows1255",
|
||||
"windows-1256": "windows1256",
|
||||
"windows-1257": "windows1257",
|
||||
"windows-1258": "windows1258",
|
||||
"x-mac-cyrillic": "macintoshCyrillic",
|
||||
"gbk": "gbk",
|
||||
"gb18030": "gb18030",
|
||||
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
|
||||
"big5": "big5",
|
||||
"euc-jp": "eucjp",
|
||||
"iso-2022-jp": "iso2022jp",
|
||||
"shift_jis": "shiftJIS",
|
||||
"euc-kr": "euckr",
|
||||
"replacement": "replacement",
|
||||
"utf-16be": "utf16be",
|
||||
"utf-16le": "utf16le",
|
||||
"x-user-defined": "xUserDefined",
|
||||
}
|
||||
|
||||
// locales is taken from
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
|
||||
var locales = []struct{ tag, name string }{
|
||||
// The default value. Explicitly state latin to benefit from the exact
|
||||
// script option, while still making 1252 the default encoding for languages
|
||||
// written in Latin script.
|
||||
{"und_Latn", "windows-1252"},
|
||||
{"ar", "windows-1256"},
|
||||
{"ba", "windows-1251"},
|
||||
{"be", "windows-1251"},
|
||||
{"bg", "windows-1251"},
|
||||
{"cs", "windows-1250"},
|
||||
{"el", "iso-8859-7"},
|
||||
{"et", "windows-1257"},
|
||||
{"fa", "windows-1256"},
|
||||
{"he", "windows-1255"},
|
||||
{"hr", "windows-1250"},
|
||||
{"hu", "iso-8859-2"},
|
||||
{"ja", "shift_jis"},
|
||||
{"kk", "windows-1251"},
|
||||
{"ko", "euc-kr"},
|
||||
{"ku", "windows-1254"},
|
||||
{"ky", "windows-1251"},
|
||||
{"lt", "windows-1257"},
|
||||
{"lv", "windows-1257"},
|
||||
{"mk", "windows-1251"},
|
||||
{"pl", "iso-8859-2"},
|
||||
{"ru", "windows-1251"},
|
||||
{"sah", "windows-1251"},
|
||||
{"sk", "windows-1250"},
|
||||
{"sl", "iso-8859-2"},
|
||||
{"sr", "windows-1251"},
|
||||
{"tg", "windows-1251"},
|
||||
{"th", "windows-874"},
|
||||
{"tr", "windows-1254"},
|
||||
{"tt", "windows-1251"},
|
||||
{"uk", "windows-1251"},
|
||||
{"vi", "windows-1258"},
|
||||
{"zh-hans", "gb18030"},
|
||||
{"zh-hant", "big5"},
|
||||
}
|
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
@ -1,86 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package htmlindex maps character set encoding names to Encodings as
|
||||
// recommended by the W3C for use in HTML 5. See http://www.w3.org/TR/encoding.
|
||||
package htmlindex
|
||||
|
||||
// TODO: perhaps have a "bare" version of the index (used by this package) that
|
||||
// is not pre-loaded with all encodings. Global variables in encodings prevent
|
||||
// the linker from being able to purge unneeded tables. This means that
|
||||
// referencing all encodings, as this package does for the default index, links
|
||||
// in all encodings unconditionally.
|
||||
//
|
||||
// This issue can be solved by either solving the linking issue (see
|
||||
// https://github.com/golang/go/issues/6330) or refactoring the encoding tables
|
||||
// (e.g. moving the tables to internal packages that do not use global
|
||||
// variables).
|
||||
|
||||
// TODO: allow canonicalizing names
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidName = errors.New("htmlindex: invalid encoding name")
|
||||
errUnknown = errors.New("htmlindex: unknown Encoding")
|
||||
errUnsupported = errors.New("htmlindex: this encoding is not supported")
|
||||
)
|
||||
|
||||
var (
|
||||
matcherOnce sync.Once
|
||||
matcher language.Matcher
|
||||
)
|
||||
|
||||
// LanguageDefault returns the canonical name of the default encoding for a
|
||||
// given language.
|
||||
func LanguageDefault(tag language.Tag) string {
|
||||
matcherOnce.Do(func() {
|
||||
tags := []language.Tag{}
|
||||
for _, t := range strings.Split(locales, " ") {
|
||||
tags = append(tags, language.MustParse(t))
|
||||
}
|
||||
matcher = language.NewMatcher(tags, language.PreferSameScript(true))
|
||||
})
|
||||
_, i, _ := matcher.Match(tag)
|
||||
return canonical[localeMap[i]] // Default is Windows-1252.
|
||||
}
|
||||
|
||||
// Get returns an Encoding for one of the names listed in
|
||||
// http://www.w3.org/TR/encoding using the Default Index. Matching is case-
|
||||
// insensitive.
|
||||
func Get(name string) (encoding.Encoding, error) {
|
||||
x, ok := nameMap[strings.ToLower(strings.TrimSpace(name))]
|
||||
if !ok {
|
||||
return nil, errInvalidName
|
||||
}
|
||||
return encodings[x], nil
|
||||
}
|
||||
|
||||
// Name reports the canonical name of the given Encoding. It will return
|
||||
// an error if e is not associated with a supported encoding scheme.
|
||||
func Name(e encoding.Encoding) (string, error) {
|
||||
id, ok := e.(identifier.Interface)
|
||||
if !ok {
|
||||
return "", errUnknown
|
||||
}
|
||||
mib, _ := id.ID()
|
||||
if mib == 0 {
|
||||
return "", errUnknown
|
||||
}
|
||||
v, ok := mibMap[mib]
|
||||
if !ok {
|
||||
return "", errUnsupported
|
||||
}
|
||||
return canonical[v], nil
|
||||
}
|
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
@ -1,105 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package htmlindex
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// mibMap maps a MIB identifier to an htmlEncoding index.
|
||||
var mibMap = map[identifier.MIB]htmlEncoding{
|
||||
identifier.UTF8: utf8,
|
||||
identifier.UTF16BE: utf16be,
|
||||
identifier.UTF16LE: utf16le,
|
||||
identifier.IBM866: ibm866,
|
||||
identifier.ISOLatin2: iso8859_2,
|
||||
identifier.ISOLatin3: iso8859_3,
|
||||
identifier.ISOLatin4: iso8859_4,
|
||||
identifier.ISOLatinCyrillic: iso8859_5,
|
||||
identifier.ISOLatinArabic: iso8859_6,
|
||||
identifier.ISOLatinGreek: iso8859_7,
|
||||
identifier.ISOLatinHebrew: iso8859_8,
|
||||
identifier.ISO88598I: iso8859_8I,
|
||||
identifier.ISOLatin6: iso8859_10,
|
||||
identifier.ISO885913: iso8859_13,
|
||||
identifier.ISO885914: iso8859_14,
|
||||
identifier.ISO885915: iso8859_15,
|
||||
identifier.ISO885916: iso8859_16,
|
||||
identifier.KOI8R: koi8r,
|
||||
identifier.KOI8U: koi8u,
|
||||
identifier.Macintosh: macintosh,
|
||||
identifier.MacintoshCyrillic: macintoshCyrillic,
|
||||
identifier.Windows874: windows874,
|
||||
identifier.Windows1250: windows1250,
|
||||
identifier.Windows1251: windows1251,
|
||||
identifier.Windows1252: windows1252,
|
||||
identifier.Windows1253: windows1253,
|
||||
identifier.Windows1254: windows1254,
|
||||
identifier.Windows1255: windows1255,
|
||||
identifier.Windows1256: windows1256,
|
||||
identifier.Windows1257: windows1257,
|
||||
identifier.Windows1258: windows1258,
|
||||
identifier.XUserDefined: xUserDefined,
|
||||
identifier.GBK: gbk,
|
||||
identifier.GB18030: gb18030,
|
||||
identifier.Big5: big5,
|
||||
identifier.EUCPkdFmtJapanese: eucjp,
|
||||
identifier.ISO2022JP: iso2022jp,
|
||||
identifier.ShiftJIS: shiftJIS,
|
||||
identifier.EUCKR: euckr,
|
||||
identifier.Replacement: replacement,
|
||||
}
|
||||
|
||||
// encodings maps the internal htmlEncoding to an Encoding.
|
||||
// TODO: consider using a reusable index in encoding/internal.
|
||||
var encodings = [numEncodings]encoding.Encoding{
|
||||
utf8: unicode.UTF8,
|
||||
ibm866: charmap.CodePage866,
|
||||
iso8859_2: charmap.ISO8859_2,
|
||||
iso8859_3: charmap.ISO8859_3,
|
||||
iso8859_4: charmap.ISO8859_4,
|
||||
iso8859_5: charmap.ISO8859_5,
|
||||
iso8859_6: charmap.ISO8859_6,
|
||||
iso8859_7: charmap.ISO8859_7,
|
||||
iso8859_8: charmap.ISO8859_8,
|
||||
iso8859_8I: charmap.ISO8859_8I,
|
||||
iso8859_10: charmap.ISO8859_10,
|
||||
iso8859_13: charmap.ISO8859_13,
|
||||
iso8859_14: charmap.ISO8859_14,
|
||||
iso8859_15: charmap.ISO8859_15,
|
||||
iso8859_16: charmap.ISO8859_16,
|
||||
koi8r: charmap.KOI8R,
|
||||
koi8u: charmap.KOI8U,
|
||||
macintosh: charmap.Macintosh,
|
||||
windows874: charmap.Windows874,
|
||||
windows1250: charmap.Windows1250,
|
||||
windows1251: charmap.Windows1251,
|
||||
windows1252: charmap.Windows1252,
|
||||
windows1253: charmap.Windows1253,
|
||||
windows1254: charmap.Windows1254,
|
||||
windows1255: charmap.Windows1255,
|
||||
windows1256: charmap.Windows1256,
|
||||
windows1257: charmap.Windows1257,
|
||||
windows1258: charmap.Windows1258,
|
||||
macintoshCyrillic: charmap.MacintoshCyrillic,
|
||||
gbk: simplifiedchinese.GBK,
|
||||
gb18030: simplifiedchinese.GB18030,
|
||||
big5: traditionalchinese.Big5,
|
||||
eucjp: japanese.EUCJP,
|
||||
iso2022jp: japanese.ISO2022JP,
|
||||
shiftJIS: japanese.ShiftJIS,
|
||||
euckr: korean.EUCKR,
|
||||
replacement: encoding.Replacement,
|
||||
utf16be: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
utf16le: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
xUserDefined: charmap.XUserDefined,
|
||||
}
|
352
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
352
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
@ -1,352 +0,0 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package htmlindex
|
||||
|
||||
type htmlEncoding byte
|
||||
|
||||
const (
|
||||
utf8 htmlEncoding = iota
|
||||
ibm866
|
||||
iso8859_2
|
||||
iso8859_3
|
||||
iso8859_4
|
||||
iso8859_5
|
||||
iso8859_6
|
||||
iso8859_7
|
||||
iso8859_8
|
||||
iso8859_8I
|
||||
iso8859_10
|
||||
iso8859_13
|
||||
iso8859_14
|
||||
iso8859_15
|
||||
iso8859_16
|
||||
koi8r
|
||||
koi8u
|
||||
macintosh
|
||||
windows874
|
||||
windows1250
|
||||
windows1251
|
||||
windows1252
|
||||
windows1253
|
||||
windows1254
|
||||
windows1255
|
||||
windows1256
|
||||
windows1257
|
||||
windows1258
|
||||
macintoshCyrillic
|
||||
gbk
|
||||
gb18030
|
||||
big5
|
||||
eucjp
|
||||
iso2022jp
|
||||
shiftJIS
|
||||
euckr
|
||||
replacement
|
||||
utf16be
|
||||
utf16le
|
||||
xUserDefined
|
||||
numEncodings
|
||||
)
|
||||
|
||||
var canonical = [numEncodings]string{
|
||||
"utf-8",
|
||||
"ibm866",
|
||||
"iso-8859-2",
|
||||
"iso-8859-3",
|
||||
"iso-8859-4",
|
||||
"iso-8859-5",
|
||||
"iso-8859-6",
|
||||
"iso-8859-7",
|
||||
"iso-8859-8",
|
||||
"iso-8859-8-i",
|
||||
"iso-8859-10",
|
||||
"iso-8859-13",
|
||||
"iso-8859-14",
|
||||
"iso-8859-15",
|
||||
"iso-8859-16",
|
||||
"koi8-r",
|
||||
"koi8-u",
|
||||
"macintosh",
|
||||
"windows-874",
|
||||
"windows-1250",
|
||||
"windows-1251",
|
||||
"windows-1252",
|
||||
"windows-1253",
|
||||
"windows-1254",
|
||||
"windows-1255",
|
||||
"windows-1256",
|
||||
"windows-1257",
|
||||
"windows-1258",
|
||||
"x-mac-cyrillic",
|
||||
"gbk",
|
||||
"gb18030",
|
||||
"big5",
|
||||
"euc-jp",
|
||||
"iso-2022-jp",
|
||||
"shift_jis",
|
||||
"euc-kr",
|
||||
"replacement",
|
||||
"utf-16be",
|
||||
"utf-16le",
|
||||
"x-user-defined",
|
||||
}
|
||||
|
||||
var nameMap = map[string]htmlEncoding{
|
||||
"unicode-1-1-utf-8": utf8,
|
||||
"utf-8": utf8,
|
||||
"utf8": utf8,
|
||||
"866": ibm866,
|
||||
"cp866": ibm866,
|
||||
"csibm866": ibm866,
|
||||
"ibm866": ibm866,
|
||||
"csisolatin2": iso8859_2,
|
||||
"iso-8859-2": iso8859_2,
|
||||
"iso-ir-101": iso8859_2,
|
||||
"iso8859-2": iso8859_2,
|
||||
"iso88592": iso8859_2,
|
||||
"iso_8859-2": iso8859_2,
|
||||
"iso_8859-2:1987": iso8859_2,
|
||||
"l2": iso8859_2,
|
||||
"latin2": iso8859_2,
|
||||
"csisolatin3": iso8859_3,
|
||||
"iso-8859-3": iso8859_3,
|
||||
"iso-ir-109": iso8859_3,
|
||||
"iso8859-3": iso8859_3,
|
||||
"iso88593": iso8859_3,
|
||||
"iso_8859-3": iso8859_3,
|
||||
"iso_8859-3:1988": iso8859_3,
|
||||
"l3": iso8859_3,
|
||||
"latin3": iso8859_3,
|
||||
"csisolatin4": iso8859_4,
|
||||
"iso-8859-4": iso8859_4,
|
||||
"iso-ir-110": iso8859_4,
|
||||
"iso8859-4": iso8859_4,
|
||||
"iso88594": iso8859_4,
|
||||
"iso_8859-4": iso8859_4,
|
||||
"iso_8859-4:1988": iso8859_4,
|
||||
"l4": iso8859_4,
|
||||
"latin4": iso8859_4,
|
||||
"csisolatincyrillic": iso8859_5,
|
||||
"cyrillic": iso8859_5,
|
||||
"iso-8859-5": iso8859_5,
|
||||
"iso-ir-144": iso8859_5,
|
||||
"iso8859-5": iso8859_5,
|
||||
"iso88595": iso8859_5,
|
||||
"iso_8859-5": iso8859_5,
|
||||
"iso_8859-5:1988": iso8859_5,
|
||||
"arabic": iso8859_6,
|
||||
"asmo-708": iso8859_6,
|
||||
"csiso88596e": iso8859_6,
|
||||
"csiso88596i": iso8859_6,
|
||||
"csisolatinarabic": iso8859_6,
|
||||
"ecma-114": iso8859_6,
|
||||
"iso-8859-6": iso8859_6,
|
||||
"iso-8859-6-e": iso8859_6,
|
||||
"iso-8859-6-i": iso8859_6,
|
||||
"iso-ir-127": iso8859_6,
|
||||
"iso8859-6": iso8859_6,
|
||||
"iso88596": iso8859_6,
|
||||
"iso_8859-6": iso8859_6,
|
||||
"iso_8859-6:1987": iso8859_6,
|
||||
"csisolatingreek": iso8859_7,
|
||||
"ecma-118": iso8859_7,
|
||||
"elot_928": iso8859_7,
|
||||
"greek": iso8859_7,
|
||||
"greek8": iso8859_7,
|
||||
"iso-8859-7": iso8859_7,
|
||||
"iso-ir-126": iso8859_7,
|
||||
"iso8859-7": iso8859_7,
|
||||
"iso88597": iso8859_7,
|
||||
"iso_8859-7": iso8859_7,
|
||||
"iso_8859-7:1987": iso8859_7,
|
||||
"sun_eu_greek": iso8859_7,
|
||||
"csiso88598e": iso8859_8,
|
||||
"csisolatinhebrew": iso8859_8,
|
||||
"hebrew": iso8859_8,
|
||||
"iso-8859-8": iso8859_8,
|
||||
"iso-8859-8-e": iso8859_8,
|
||||
"iso-ir-138": iso8859_8,
|
||||
"iso8859-8": iso8859_8,
|
||||
"iso88598": iso8859_8,
|
||||
"iso_8859-8": iso8859_8,
|
||||
"iso_8859-8:1988": iso8859_8,
|
||||
"visual": iso8859_8,
|
||||
"csiso88598i": iso8859_8I,
|
||||
"iso-8859-8-i": iso8859_8I,
|
||||
"logical": iso8859_8I,
|
||||
"csisolatin6": iso8859_10,
|
||||
"iso-8859-10": iso8859_10,
|
||||
"iso-ir-157": iso8859_10,
|
||||
"iso8859-10": iso8859_10,
|
||||
"iso885910": iso8859_10,
|
||||
"l6": iso8859_10,
|
||||
"latin6": iso8859_10,
|
||||
"iso-8859-13": iso8859_13,
|
||||
"iso8859-13": iso8859_13,
|
||||
"iso885913": iso8859_13,
|
||||
"iso-8859-14": iso8859_14,
|
||||
"iso8859-14": iso8859_14,
|
||||
"iso885914": iso8859_14,
|
||||
"csisolatin9": iso8859_15,
|
||||
"iso-8859-15": iso8859_15,
|
||||
"iso8859-15": iso8859_15,
|
||||
"iso885915": iso8859_15,
|
||||
"iso_8859-15": iso8859_15,
|
||||
"l9": iso8859_15,
|
||||
"iso-8859-16": iso8859_16,
|
||||
"cskoi8r": koi8r,
|
||||
"koi": koi8r,
|
||||
"koi8": koi8r,
|
||||
"koi8-r": koi8r,
|
||||
"koi8_r": koi8r,
|
||||
"koi8-ru": koi8u,
|
||||
"koi8-u": koi8u,
|
||||
"csmacintosh": macintosh,
|
||||
"mac": macintosh,
|
||||
"macintosh": macintosh,
|
||||
"x-mac-roman": macintosh,
|
||||
"dos-874": windows874,
|
||||
"iso-8859-11": windows874,
|
||||
"iso8859-11": windows874,
|
||||
"iso885911": windows874,
|
||||
"tis-620": windows874,
|
||||
"windows-874": windows874,
|
||||
"cp1250": windows1250,
|
||||
"windows-1250": windows1250,
|
||||
"x-cp1250": windows1250,
|
||||
"cp1251": windows1251,
|
||||
"windows-1251": windows1251,
|
||||
"x-cp1251": windows1251,
|
||||
"ansi_x3.4-1968": windows1252,
|
||||
"ascii": windows1252,
|
||||
"cp1252": windows1252,
|
||||
"cp819": windows1252,
|
||||
"csisolatin1": windows1252,
|
||||
"ibm819": windows1252,
|
||||
"iso-8859-1": windows1252,
|
||||
"iso-ir-100": windows1252,
|
||||
"iso8859-1": windows1252,
|
||||
"iso88591": windows1252,
|
||||
"iso_8859-1": windows1252,
|
||||
"iso_8859-1:1987": windows1252,
|
||||
"l1": windows1252,
|
||||
"latin1": windows1252,
|
||||
"us-ascii": windows1252,
|
||||
"windows-1252": windows1252,
|
||||
"x-cp1252": windows1252,
|
||||
"cp1253": windows1253,
|
||||
"windows-1253": windows1253,
|
||||
"x-cp1253": windows1253,
|
||||
"cp1254": windows1254,
|
||||
"csisolatin5": windows1254,
|
||||
"iso-8859-9": windows1254,
|
||||
"iso-ir-148": windows1254,
|
||||
"iso8859-9": windows1254,
|
||||
"iso88599": windows1254,
|
||||
"iso_8859-9": windows1254,
|
||||
"iso_8859-9:1989": windows1254,
|
||||
"l5": windows1254,
|
||||
"latin5": windows1254,
|
||||
"windows-1254": windows1254,
|
||||
"x-cp1254": windows1254,
|
||||
"cp1255": windows1255,
|
||||
"windows-1255": windows1255,
|
||||
"x-cp1255": windows1255,
|
||||
"cp1256": windows1256,
|
||||
"windows-1256": windows1256,
|
||||
"x-cp1256": windows1256,
|
||||
"cp1257": windows1257,
|
||||
"windows-1257": windows1257,
|
||||
"x-cp1257": windows1257,
|
||||
"cp1258": windows1258,
|
||||
"windows-1258": windows1258,
|
||||
"x-cp1258": windows1258,
|
||||
"x-mac-cyrillic": macintoshCyrillic,
|
||||
"x-mac-ukrainian": macintoshCyrillic,
|
||||
"chinese": gbk,
|
||||
"csgb2312": gbk,
|
||||
"csiso58gb231280": gbk,
|
||||
"gb2312": gbk,
|
||||
"gb_2312": gbk,
|
||||
"gb_2312-80": gbk,
|
||||
"gbk": gbk,
|
||||
"iso-ir-58": gbk,
|
||||
"x-gbk": gbk,
|
||||
"gb18030": gb18030,
|
||||
"big5": big5,
|
||||
"big5-hkscs": big5,
|
||||
"cn-big5": big5,
|
||||
"csbig5": big5,
|
||||
"x-x-big5": big5,
|
||||
"cseucpkdfmtjapanese": eucjp,
|
||||
"euc-jp": eucjp,
|
||||
"x-euc-jp": eucjp,
|
||||
"csiso2022jp": iso2022jp,
|
||||
"iso-2022-jp": iso2022jp,
|
||||
"csshiftjis": shiftJIS,
|
||||
"ms932": shiftJIS,
|
||||
"ms_kanji": shiftJIS,
|
||||
"shift-jis": shiftJIS,
|
||||
"shift_jis": shiftJIS,
|
||||
"sjis": shiftJIS,
|
||||
"windows-31j": shiftJIS,
|
||||
"x-sjis": shiftJIS,
|
||||
"cseuckr": euckr,
|
||||
"csksc56011987": euckr,
|
||||
"euc-kr": euckr,
|
||||
"iso-ir-149": euckr,
|
||||
"korean": euckr,
|
||||
"ks_c_5601-1987": euckr,
|
||||
"ks_c_5601-1989": euckr,
|
||||
"ksc5601": euckr,
|
||||
"ksc_5601": euckr,
|
||||
"windows-949": euckr,
|
||||
"csiso2022kr": replacement,
|
||||
"hz-gb-2312": replacement,
|
||||
"iso-2022-cn": replacement,
|
||||
"iso-2022-cn-ext": replacement,
|
||||
"iso-2022-kr": replacement,
|
||||
"utf-16be": utf16be,
|
||||
"utf-16": utf16le,
|
||||
"utf-16le": utf16le,
|
||||
"x-user-defined": xUserDefined,
|
||||
}
|
||||
|
||||
var localeMap = []htmlEncoding{
|
||||
windows1252, // und_Latn
|
||||
windows1256, // ar
|
||||
windows1251, // ba
|
||||
windows1251, // be
|
||||
windows1251, // bg
|
||||
windows1250, // cs
|
||||
iso8859_7, // el
|
||||
windows1257, // et
|
||||
windows1256, // fa
|
||||
windows1255, // he
|
||||
windows1250, // hr
|
||||
iso8859_2, // hu
|
||||
shiftJIS, // ja
|
||||
windows1251, // kk
|
||||
euckr, // ko
|
||||
windows1254, // ku
|
||||
windows1251, // ky
|
||||
windows1257, // lt
|
||||
windows1257, // lv
|
||||
windows1251, // mk
|
||||
iso8859_2, // pl
|
||||
windows1251, // ru
|
||||
windows1251, // sah
|
||||
windows1250, // sk
|
||||
iso8859_2, // sl
|
||||
windows1251, // sr
|
||||
windows1251, // tg
|
||||
windows874, // th
|
||||
windows1254, // tr
|
||||
windows1251, // tt
|
||||
windows1251, // uk
|
||||
windows1258, // vi
|
||||
gb18030, // zh-hans
|
||||
big5, // zh-hant
|
||||
}
|
||||
|
||||
const locales = "und_Latn ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
|
192
vendor/golang.org/x/text/encoding/ianaindex/gen.go
generated
vendored
192
vendor/golang.org/x/text/encoding/ianaindex/gen.go
generated
vendored
@ -1,192 +0,0 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type registry struct {
|
||||
XMLName xml.Name `xml:"registry"`
|
||||
Updated string `xml:"updated"`
|
||||
Registry []struct {
|
||||
ID string `xml:"id,attr"`
|
||||
Record []struct {
|
||||
Name string `xml:"name"`
|
||||
Xref []struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:"data,attr"`
|
||||
} `xml:"xref"`
|
||||
Desc struct {
|
||||
Data string `xml:",innerxml"`
|
||||
} `xml:"description,"`
|
||||
MIB string `xml:"value"`
|
||||
Alias []string `xml:"alias"`
|
||||
MIME string `xml:"preferred_alias"`
|
||||
} `xml:"record"`
|
||||
} `xml:"registry"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
|
||||
reg := ®istry{}
|
||||
if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF {
|
||||
log.Fatalf("Error decoding charset registry: %v", err)
|
||||
}
|
||||
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
|
||||
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
|
||||
}
|
||||
|
||||
x := &indexInfo{}
|
||||
|
||||
for _, rec := range reg.Registry[0].Record {
|
||||
mib := identifier.MIB(parseInt(rec.MIB))
|
||||
x.addEntry(mib, rec.Name)
|
||||
for _, a := range rec.Alias {
|
||||
a = strings.Split(a, " ")[0] // strip comments.
|
||||
x.addAlias(a, mib)
|
||||
// MIB name aliases are prefixed with a "cs" (character set) in the
|
||||
// registry to identify them as display names and to ensure that
|
||||
// the name starts with a lowercase letter in case it is used as
|
||||
// an identifier. We remove it to be left with a nice clean name.
|
||||
if strings.HasPrefix(a, "cs") {
|
||||
x.setName(2, a[2:])
|
||||
}
|
||||
}
|
||||
if rec.MIME != "" {
|
||||
x.addAlias(rec.MIME, mib)
|
||||
x.setName(1, rec.MIME)
|
||||
}
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`)
|
||||
|
||||
writeIndex(w, x)
|
||||
|
||||
w.WriteGoFile("tables.go", "ianaindex")
|
||||
}
|
||||
|
||||
type alias struct {
|
||||
name string
|
||||
mib identifier.MIB
|
||||
}
|
||||
|
||||
type indexInfo struct {
|
||||
// compacted index from code to MIB
|
||||
codeToMIB []identifier.MIB
|
||||
alias []alias
|
||||
names [][3]string
|
||||
}
|
||||
|
||||
func (ii *indexInfo) Len() int {
|
||||
return len(ii.codeToMIB)
|
||||
}
|
||||
|
||||
func (ii *indexInfo) Less(a, b int) bool {
|
||||
return ii.codeToMIB[a] < ii.codeToMIB[b]
|
||||
}
|
||||
|
||||
func (ii *indexInfo) Swap(a, b int) {
|
||||
ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
|
||||
// Co-sort the names.
|
||||
ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
|
||||
}
|
||||
|
||||
func (ii *indexInfo) setName(i int, name string) {
|
||||
ii.names[len(ii.names)-1][i] = name
|
||||
}
|
||||
|
||||
func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
|
||||
ii.names = append(ii.names, [3]string{name, name, name})
|
||||
ii.addAlias(name, mib)
|
||||
ii.codeToMIB = append(ii.codeToMIB, mib)
|
||||
}
|
||||
|
||||
func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
|
||||
// Don't add duplicates for the same mib. Adding duplicate aliases for
|
||||
// different MIBs will cause the compiler to barf on an invalid map: great!.
|
||||
for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
|
||||
if ii.alias[i].name == name {
|
||||
return
|
||||
}
|
||||
}
|
||||
ii.alias = append(ii.alias, alias{name, mib})
|
||||
lower := strings.ToLower(name)
|
||||
if lower != name {
|
||||
ii.addAlias(lower, mib)
|
||||
}
|
||||
}
|
||||
|
||||
const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
|
||||
|
||||
func writeIndex(w *gen.CodeWriter, x *indexInfo) {
|
||||
sort.Stable(x)
|
||||
|
||||
// Write constants.
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, m := range x.codeToMIB {
|
||||
if i == 0 {
|
||||
fmt.Fprintf(w, "enc%d = iota\n", m)
|
||||
} else {
|
||||
fmt.Fprintf(w, "enc%d\n", m)
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "numIANA")
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("ianaToMIB", x.codeToMIB)
|
||||
|
||||
var ianaNames, mibNames []string
|
||||
for _, names := range x.names {
|
||||
n := names[0]
|
||||
if names[0] != names[1] {
|
||||
// MIME names are mostly identical to IANA names. We share the
|
||||
// tables by setting the first byte of the string to an index into
|
||||
// the string itself (< maxMIMENameLen) to the IANA name. The MIME
|
||||
// name immediately follows the index.
|
||||
x := len(names[1]) + 1
|
||||
if x > maxMIMENameLen {
|
||||
log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
|
||||
}
|
||||
n = string(x) + names[1] + names[0]
|
||||
}
|
||||
ianaNames = append(ianaNames, n)
|
||||
mibNames = append(mibNames, names[2])
|
||||
}
|
||||
|
||||
w.WriteVar("ianaNames", ianaNames)
|
||||
w.WriteVar("mibNames", mibNames)
|
||||
|
||||
w.WriteComment(`
|
||||
TODO: Instead of using a map, we could use binary search strings doing
|
||||
on-the fly lower-casing per character. This allows to always avoid
|
||||
allocation and will be considerably more compact.`)
|
||||
fmt.Fprintln(w, "var ianaAliases = map[string]int{")
|
||||
for _, a := range x.alias {
|
||||
fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
func parseInt(s string) int {
|
||||
x, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not parse integer: %v", err)
|
||||
}
|
||||
return int(x)
|
||||
}
|
209
vendor/golang.org/x/text/encoding/ianaindex/ianaindex.go
generated
vendored
209
vendor/golang.org/x/text/encoding/ianaindex/ianaindex.go
generated
vendored
@ -1,209 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package ianaindex maps names to Encodings as specified by the IANA registry.
|
||||
// This includes both the MIME and IANA names.
|
||||
//
|
||||
// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
|
||||
// more details.
|
||||
package ianaindex
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// TODO: remove the "Status... incomplete" in the package doc comment.
|
||||
// TODO: allow users to specify their own aliases?
|
||||
// TODO: allow users to specify their own indexes?
|
||||
// TODO: allow canonicalizing names
|
||||
|
||||
// NOTE: only use these top-level variables if we can get the linker to drop
|
||||
// the indexes when they are not used. Make them a function or perhaps only
|
||||
// support MIME otherwise.
|
||||
|
||||
var (
|
||||
// MIME is an index to map MIME names.
|
||||
MIME *Index = mime
|
||||
|
||||
// IANA is an index that supports all names and aliases using IANA names as
|
||||
// the canonical identifier.
|
||||
IANA *Index = iana
|
||||
|
||||
// MIB is an index that associates the MIB display name with an Encoding.
|
||||
MIB *Index = mib
|
||||
|
||||
mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
|
||||
iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
|
||||
mib = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
|
||||
)
|
||||
|
||||
// Index maps names registered by IANA to Encodings.
|
||||
// Currently different Indexes only differ in the names they return for
|
||||
// encodings. In the future they may also differ in supported aliases.
|
||||
type Index struct {
|
||||
names func(i int) string
|
||||
toMIB []identifier.MIB // Sorted slice of supported MIBs
|
||||
alias map[string]int
|
||||
enc []encoding.Encoding
|
||||
}
|
||||
|
||||
var (
|
||||
errInvalidName = errors.New("ianaindex: invalid encoding name")
|
||||
errUnknown = errors.New("ianaindex: unknown Encoding")
|
||||
errUnsupported = errors.New("ianaindex: unsupported Encoding")
|
||||
)
|
||||
|
||||
// Encoding returns an Encoding for IANA-registered names. Matching is
|
||||
// case-insensitive.
|
||||
func (x *Index) Encoding(name string) (encoding.Encoding, error) {
|
||||
name = strings.TrimSpace(name)
|
||||
// First try without lowercasing (possibly creating an allocation).
|
||||
i, ok := x.alias[name]
|
||||
if !ok {
|
||||
i, ok = x.alias[strings.ToLower(name)]
|
||||
if !ok {
|
||||
return nil, errInvalidName
|
||||
}
|
||||
}
|
||||
return x.enc[i], nil
|
||||
}
|
||||
|
||||
// Name reports the canonical name of the given Encoding. It will return an
|
||||
// error if the e is not associated with a known encoding scheme.
|
||||
func (x *Index) Name(e encoding.Encoding) (string, error) {
|
||||
id, ok := e.(identifier.Interface)
|
||||
if !ok {
|
||||
return "", errUnknown
|
||||
}
|
||||
mib, _ := id.ID()
|
||||
if mib == 0 {
|
||||
return "", errUnknown
|
||||
}
|
||||
v := findMIB(x.toMIB, mib)
|
||||
if v == -1 {
|
||||
return "", errUnsupported
|
||||
}
|
||||
return x.names(v), nil
|
||||
}
|
||||
|
||||
// TODO: the coverage of this index is rather spotty. Allowing users to set
|
||||
// encodings would allow:
|
||||
// - users to increase coverage
|
||||
// - allow a partially loaded set of encodings in case the user doesn't need to
|
||||
// them all.
|
||||
// - write an OS-specific wrapper for supported encodings and set them.
|
||||
// The exact definition of Set depends a bit on if and how we want to let users
|
||||
// write their own Encoding implementations. Also, it is not possible yet to
|
||||
// only partially load the encodings without doing some refactoring. Until this
|
||||
// is solved, we might as well not support Set.
|
||||
// // Set sets the e to be used for the encoding scheme identified by name. Only
|
||||
// // canonical names may be used. An empty name assigns e to its internally
|
||||
// // associated encoding scheme.
|
||||
// func (x *Index) Set(name string, e encoding.Encoding) error {
|
||||
// panic("TODO: implement")
|
||||
// }
|
||||
|
||||
func findMIB(x []identifier.MIB, mib identifier.MIB) int {
|
||||
i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
|
||||
if i < len(x) && x[i] == mib {
|
||||
return i
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
|
||||
|
||||
func mimeName(x int) string {
|
||||
n := ianaNames[x]
|
||||
// See gen.go for a description of the encoding.
|
||||
if n[0] <= maxMIMENameLen {
|
||||
return n[1:n[0]]
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func ianaName(x int) string {
|
||||
n := ianaNames[x]
|
||||
// See gen.go for a description of the encoding.
|
||||
if n[0] <= maxMIMENameLen {
|
||||
return n[n[0]:]
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func mibName(x int) string {
|
||||
return mibNames[x]
|
||||
}
|
||||
|
||||
var encodings = [numIANA]encoding.Encoding{
|
||||
enc106: unicode.UTF8,
|
||||
enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
|
||||
enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
enc2028: charmap.CodePage037,
|
||||
enc2011: charmap.CodePage437,
|
||||
enc2009: charmap.CodePage850,
|
||||
enc2010: charmap.CodePage852,
|
||||
enc2046: charmap.CodePage855,
|
||||
enc2089: charmap.CodePage858,
|
||||
enc2048: charmap.CodePage860,
|
||||
enc2013: charmap.CodePage862,
|
||||
enc2050: charmap.CodePage863,
|
||||
enc2052: charmap.CodePage865,
|
||||
enc2086: charmap.CodePage866,
|
||||
enc2102: charmap.CodePage1047,
|
||||
enc2091: charmap.CodePage1140,
|
||||
enc4: charmap.ISO8859_1,
|
||||
enc5: charmap.ISO8859_2,
|
||||
enc6: charmap.ISO8859_3,
|
||||
enc7: charmap.ISO8859_4,
|
||||
enc8: charmap.ISO8859_5,
|
||||
enc9: charmap.ISO8859_6,
|
||||
enc81: charmap.ISO8859_6E,
|
||||
enc82: charmap.ISO8859_6I,
|
||||
enc10: charmap.ISO8859_7,
|
||||
enc11: charmap.ISO8859_8,
|
||||
enc84: charmap.ISO8859_8E,
|
||||
enc85: charmap.ISO8859_8I,
|
||||
enc12: charmap.ISO8859_9,
|
||||
enc13: charmap.ISO8859_10,
|
||||
enc109: charmap.ISO8859_13,
|
||||
enc110: charmap.ISO8859_14,
|
||||
enc111: charmap.ISO8859_15,
|
||||
enc112: charmap.ISO8859_16,
|
||||
enc2084: charmap.KOI8R,
|
||||
enc2088: charmap.KOI8U,
|
||||
enc2027: charmap.Macintosh,
|
||||
enc2109: charmap.Windows874,
|
||||
enc2250: charmap.Windows1250,
|
||||
enc2251: charmap.Windows1251,
|
||||
enc2252: charmap.Windows1252,
|
||||
enc2253: charmap.Windows1253,
|
||||
enc2254: charmap.Windows1254,
|
||||
enc2255: charmap.Windows1255,
|
||||
enc2256: charmap.Windows1256,
|
||||
enc2257: charmap.Windows1257,
|
||||
enc2258: charmap.Windows1258,
|
||||
enc18: japanese.EUCJP,
|
||||
enc39: japanese.ISO2022JP,
|
||||
enc17: japanese.ShiftJIS,
|
||||
enc38: korean.EUCKR,
|
||||
enc114: simplifiedchinese.GB18030,
|
||||
enc113: simplifiedchinese.GBK,
|
||||
enc2085: simplifiedchinese.HZGB2312,
|
||||
enc2026: traditionalchinese.Big5,
|
||||
}
|
2348
vendor/golang.org/x/text/encoding/ianaindex/tables.go
generated
vendored
2348
vendor/golang.org/x/text/encoding/ianaindex/tables.go
generated
vendored
File diff suppressed because it is too large
Load Diff
180
vendor/golang.org/x/text/encoding/internal/enctest/enctest.go
generated
vendored
180
vendor/golang.org/x/text/encoding/internal/enctest/enctest.go
generated
vendored
@ -1,180 +0,0 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package enctest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Encoder or Decoder
|
||||
type Transcoder interface {
|
||||
transform.Transformer
|
||||
Bytes([]byte) ([]byte, error)
|
||||
String(string) (string, error)
|
||||
}
|
||||
|
||||
func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) {
|
||||
for _, direction := range []string{"Decode", "Encode"} {
|
||||
t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) {
|
||||
|
||||
var coder Transcoder
|
||||
var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
|
||||
if direction == "Decode" {
|
||||
coder, want, src = e.NewDecoder(), utf8, encoded
|
||||
wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix
|
||||
} else {
|
||||
coder, want, src = e.NewEncoder(), encoded, utf8
|
||||
wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, ""
|
||||
}
|
||||
|
||||
dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
|
||||
nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if nDst != len(wPrefix)+len(want)+len(wSuffix) {
|
||||
t.Fatalf("nDst got %d, want %d",
|
||||
nDst, len(wPrefix)+len(want)+len(wSuffix))
|
||||
}
|
||||
if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
|
||||
t.Fatalf("nSrc got %d, want %d",
|
||||
nSrc, len(sPrefix)+len(src)+len(sSuffix))
|
||||
}
|
||||
if got := string(dst); got != wPrefix+want+wSuffix {
|
||||
t.Fatalf("\ngot %q\nwant %q", got, wPrefix+want+wSuffix)
|
||||
}
|
||||
|
||||
for _, n := range []int{0, 1, 2, 10, 123, 4567} {
|
||||
input := sPrefix + strings.Repeat(src, n) + sSuffix
|
||||
g, err := coder.String(input)
|
||||
if err != nil {
|
||||
t.Fatalf("Bytes: n=%d: %v", n, err)
|
||||
}
|
||||
if len(g) == 0 && len(input) == 0 {
|
||||
// If the input is empty then the output can be empty,
|
||||
// regardless of whatever wPrefix is.
|
||||
continue
|
||||
}
|
||||
got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix
|
||||
if got1 != want1 {
|
||||
t.Fatalf("ReadAll: n=%d\ngot %q\nwant %q",
|
||||
n, trim(got1), trim(want1))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFile(t *testing.T, e encoding.Encoding) {
|
||||
for _, dir := range []string{"Decode", "Encode"} {
|
||||
t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) {
|
||||
dst, src, transformer, err := load(dir, e)
|
||||
if err != nil {
|
||||
t.Fatalf("load: %v", err)
|
||||
}
|
||||
buf, err := transformer.Bytes(src)
|
||||
if err != nil {
|
||||
t.Fatalf("transform: %v", err)
|
||||
}
|
||||
if !bytes.Equal(buf, dst) {
|
||||
t.Error("transformed bytes did not match golden file")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark(b *testing.B, enc encoding.Encoding) {
|
||||
for _, direction := range []string{"Decode", "Encode"} {
|
||||
b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) {
|
||||
_, src, transformer, err := load(direction, enc)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.SetBytes(int64(len(src)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r := transform.NewReader(bytes.NewReader(src), transformer)
|
||||
io.Copy(ioutil.Discard, r)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// testdataFiles are files in testdata/*.txt.
|
||||
var testdataFiles = []struct {
|
||||
mib identifier.MIB
|
||||
basename, ext string
|
||||
}{
|
||||
{identifier.Windows1252, "candide", "windows-1252"},
|
||||
{identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"},
|
||||
{identifier.ISO2022JP, "rashomon", "iso-2022-jp"},
|
||||
{identifier.ShiftJIS, "rashomon", "shift-jis"},
|
||||
{identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
|
||||
{identifier.GBK, "sunzi-bingfa-simplified", "gbk"},
|
||||
{identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
|
||||
{identifier.Big5, "sunzi-bingfa-traditional", "big5"},
|
||||
{identifier.UTF16LE, "candide", "utf-16le"},
|
||||
{identifier.UTF8, "candide", "utf-8"},
|
||||
{identifier.UTF32BE, "candide", "utf-32be"},
|
||||
|
||||
// GB18030 is a superset of GBK and is nominally a Simplified Chinese
|
||||
// encoding, but it can also represent the entire Basic Multilingual
|
||||
// Plane, including codepoints like 'â' that aren't encodable by GBK.
|
||||
// GB18030 on Simplified Chinese should perform similarly to GBK on
|
||||
// Simplified Chinese. GB18030 on "candide" is more interesting.
|
||||
{identifier.GB18030, "candide", "gb18030"},
|
||||
}
|
||||
|
||||
func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
|
||||
basename, ext, count := "", "", 0
|
||||
for _, tf := range testdataFiles {
|
||||
if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib {
|
||||
basename, ext = tf.basename, tf.ext
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
if count == 0 {
|
||||
return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
|
||||
}
|
||||
return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
|
||||
}
|
||||
dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext)
|
||||
srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename)
|
||||
var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
|
||||
if direction == "Decode" {
|
||||
dstFile, srcFile = srcFile, dstFile
|
||||
coder = enc.NewDecoder()
|
||||
}
|
||||
dst, err := ioutil.ReadFile(dstFile)
|
||||
if err != nil {
|
||||
if dst, err = ioutil.ReadFile("../" + dstFile); err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
}
|
||||
src, err := ioutil.ReadFile(srcFile)
|
||||
if err != nil {
|
||||
if src, err = ioutil.ReadFile("../" + srcFile); err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
}
|
||||
return dst, src, coder, nil
|
||||
}
|
||||
|
||||
func trim(s string) string {
|
||||
if len(s) < 120 {
|
||||
return s
|
||||
}
|
||||
return s[:50] + "..." + s[len(s)-50:]
|
||||
}
|
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
@ -1,82 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// BOMOverride returns a new decoder transformer that is identical to fallback,
|
||||
// except that the presence of a Byte Order Mark at the start of the input
|
||||
// causes it to switch to the corresponding Unicode decoding. It will only
|
||||
// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.
|
||||
//
|
||||
// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not
|
||||
// just UTF-16 variants, and allowing falling back to any encoding scheme.
|
||||
//
|
||||
// This technique is recommended by the W3C for use in HTML 5: "For
|
||||
// compatibility with deployed content, the byte order mark (also known as BOM)
|
||||
// is considered more authoritative than anything else."
|
||||
// http://www.w3.org/TR/encoding/#specification-hooks
|
||||
//
|
||||
// Using BOMOverride is mostly intended for use cases where the first characters
|
||||
// of a fallback encoding are known to not be a BOM, for example, for valid HTML
|
||||
// and most encodings.
|
||||
func BOMOverride(fallback transform.Transformer) transform.Transformer {
|
||||
// TODO: possibly allow a variadic argument of unicode encodings to allow
|
||||
// specifying details of which fallbacks are supported as well as
|
||||
// specifying the details of the implementations. This would also allow for
|
||||
// support for UTF-32, which should not be supported by default.
|
||||
return &bomOverride{fallback: fallback}
|
||||
}
|
||||
|
||||
type bomOverride struct {
|
||||
fallback transform.Transformer
|
||||
current transform.Transformer
|
||||
}
|
||||
|
||||
func (d *bomOverride) Reset() {
|
||||
d.current = nil
|
||||
d.fallback.Reset()
|
||||
}
|
||||
|
||||
var (
|
||||
// TODO: we could use decode functions here, instead of allocating a new
|
||||
// decoder on every NewDecoder as IgnoreBOM decoders can be stateless.
|
||||
utf16le = UTF16(LittleEndian, IgnoreBOM)
|
||||
utf16be = UTF16(BigEndian, IgnoreBOM)
|
||||
)
|
||||
|
||||
const utf8BOM = "\ufeff"
|
||||
|
||||
func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if d.current != nil {
|
||||
return d.current.Transform(dst, src, atEOF)
|
||||
}
|
||||
if len(src) < 3 && !atEOF {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
d.current = d.fallback
|
||||
bomSize := 0
|
||||
if len(src) >= 2 {
|
||||
if src[0] == 0xFF && src[1] == 0xFE {
|
||||
d.current = utf16le.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if src[0] == 0xFE && src[1] == 0xFF {
|
||||
d.current = utf16be.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if len(src) >= 3 &&
|
||||
src[0] == utf8BOM[0] &&
|
||||
src[1] == utf8BOM[1] &&
|
||||
src[2] == utf8BOM[2] {
|
||||
d.current = transform.Nop
|
||||
bomSize = 3
|
||||
}
|
||||
}
|
||||
if bomSize < len(src) {
|
||||
nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF)
|
||||
}
|
||||
return nDst, nSrc + bomSize, err
|
||||
}
|
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
@ -1,434 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package unicode provides Unicode encodings such as UTF-16.
|
||||
package unicode // import "golang.org/x/text/encoding/unicode"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/internal/utf8internal"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO: I think the Transformers really should return errors on unmatched
|
||||
// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781,
|
||||
// which leaves it open, but is suggested by WhatWG. It will allow for all error
|
||||
// modes as defined by WhatWG: fatal, HTML and Replacement. This would require
|
||||
// the introduction of some kind of error type for conveying the erroneous code
|
||||
// point.
|
||||
|
||||
// UTF8 is the UTF-8 encoding.
|
||||
var UTF8 encoding.Encoding = utf8enc
|
||||
|
||||
var utf8enc = &internal.Encoding{
|
||||
&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
|
||||
"UTF-8",
|
||||
identifier.UTF8,
|
||||
}
|
||||
|
||||
type utf8Decoder struct{ transform.NopResetter }
|
||||
|
||||
func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var pSrc int // point from which to start copy in src
|
||||
var accept utf8internal.AcceptRange
|
||||
|
||||
// The decoder can only make the input larger, not smaller.
|
||||
n := len(src)
|
||||
if len(dst) < n {
|
||||
err = transform.ErrShortDst
|
||||
n = len(dst)
|
||||
atEOF = false
|
||||
}
|
||||
for nSrc < n {
|
||||
c := src[nSrc]
|
||||
if c < utf8.RuneSelf {
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
first := utf8internal.First[c]
|
||||
size := int(first & utf8internal.SizeMask)
|
||||
if first == utf8internal.FirstInvalid {
|
||||
goto handleInvalid // invalid starter byte
|
||||
}
|
||||
accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift]
|
||||
if nSrc+size > n {
|
||||
if !atEOF {
|
||||
// We may stop earlier than necessary here if the short sequence
|
||||
// has invalid bytes. Not checking for this simplifies the code
|
||||
// and may avoid duplicate computations in certain conditions.
|
||||
if err == nil {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
break
|
||||
}
|
||||
// Determine the maximal subpart of an ill-formed subsequence.
|
||||
switch {
|
||||
case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]:
|
||||
size = 1
|
||||
case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]:
|
||||
size = 2
|
||||
default:
|
||||
size = 3 // As we are short, the maximum is 3.
|
||||
}
|
||||
goto handleInvalid
|
||||
}
|
||||
if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c {
|
||||
size = 1
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 2 {
|
||||
} else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 2
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 3 {
|
||||
} else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 3
|
||||
goto handleInvalid // invalid continuation byte
|
||||
}
|
||||
nSrc += size
|
||||
continue
|
||||
|
||||
handleInvalid:
|
||||
// Copy the scanned input so far.
|
||||
nDst += copy(dst[nDst:], src[pSrc:nSrc])
|
||||
|
||||
// Append RuneError to the destination.
|
||||
const runeError = "\ufffd"
|
||||
if nDst+len(runeError) > len(dst) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += copy(dst[nDst:], runeError)
|
||||
|
||||
// Skip the maximal subpart of an ill-formed subsequence according to
|
||||
// the W3C standard way instead of the Go way. This Transform is
|
||||
// probably the only place in the text repo where it is warranted.
|
||||
nSrc += size
|
||||
pSrc = nSrc
|
||||
|
||||
// Recompute the maximum source length.
|
||||
if sz := len(dst) - nDst; sz < len(src)-nSrc {
|
||||
err = transform.ErrShortDst
|
||||
n = nSrc + sz
|
||||
atEOF = false
|
||||
}
|
||||
}
|
||||
return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err
|
||||
}
|
||||
|
||||
// UTF16 returns a UTF-16 Encoding for the given default endianness and byte
|
||||
// order mark (BOM) policy.
|
||||
//
|
||||
// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then
|
||||
// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect
|
||||
// the endianness used for decoding, and will instead be output as their
|
||||
// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy
|
||||
// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output.
|
||||
// Instead, it overrides the default endianness e for the remainder of the
|
||||
// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not
|
||||
// affect the endianness used, and will instead be output as their standard
|
||||
// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed
|
||||
// with the default Endianness. For ExpectBOM, in that case, the transformation
|
||||
// will return early with an ErrMissingBOM error.
|
||||
//
|
||||
// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of
|
||||
// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not
|
||||
// be inserted. The UTF-8 input does not need to contain a BOM.
|
||||
//
|
||||
// There is no concept of a 'native' endianness. If the UTF-16 data is produced
|
||||
// and consumed in a greater context that implies a certain endianness, use
|
||||
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
|
||||
//
|
||||
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
|
||||
// corresponds to "Where the precise type of the data stream is known... the
|
||||
// BOM should not be used" and ExpectBOM corresponds to "A particular
|
||||
// protocol... may require use of the BOM".
|
||||
func UTF16(e Endianness, b BOMPolicy) encoding.Encoding {
|
||||
return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]}
|
||||
}
|
||||
|
||||
// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that
|
||||
// some configurations map to the same MIB identifier. RFC 2781 has requirements
|
||||
// and recommendations. Some of the "configurations" are merely recommendations,
|
||||
// so multiple configurations could match.
|
||||
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
|
||||
BigEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16BE,
|
||||
UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16BE as well.
|
||||
},
|
||||
LittleEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16LE,
|
||||
UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16LE as well.
|
||||
},
|
||||
// ExpectBOM is not widely used and has no valid MIB identifier.
|
||||
}
|
||||
|
||||
// All lists a configuration for each IANA-defined UTF-16 variant.
|
||||
var All = []encoding.Encoding{
|
||||
UTF8,
|
||||
UTF16(BigEndian, UseBOM),
|
||||
UTF16(BigEndian, IgnoreBOM),
|
||||
UTF16(LittleEndian, IgnoreBOM),
|
||||
}
|
||||
|
||||
// BOMPolicy is a UTF-16 encoding's byte order mark policy.
|
||||
type BOMPolicy uint8
|
||||
|
||||
const (
|
||||
writeBOM BOMPolicy = 0x01
|
||||
acceptBOM BOMPolicy = 0x02
|
||||
requireBOM BOMPolicy = 0x04
|
||||
bomMask BOMPolicy = 0x07
|
||||
|
||||
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
|
||||
// map of an array of length 8 of a type that is also used as a key or value
|
||||
// in another map). See golang.org/issue/11354.
|
||||
// TODO: consider changing this value back to 8 if the use of 1.4.* has
|
||||
// been minimized.
|
||||
numBOMValues = 8 + 1
|
||||
|
||||
// IgnoreBOM means to ignore any byte order marks.
|
||||
IgnoreBOM BOMPolicy = 0
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16BE/LE.
|
||||
|
||||
// UseBOM means that the UTF-16 form may start with a byte order mark, which
|
||||
// will be used to override the default encoding.
|
||||
UseBOM BOMPolicy = writeBOM | acceptBOM
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16.
|
||||
|
||||
// ExpectBOM means that the UTF-16 form must start with a byte order mark,
|
||||
// which will be used to override the default encoding.
|
||||
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
|
||||
// Used in Java as Unicode (not to be confused with Java's UTF-16) and
|
||||
// ICU's UTF-16,version=1. Not compliant with RFC 2781.
|
||||
|
||||
// TODO (maybe): strictBOM: BOM must match Endianness. This would allow:
|
||||
// - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM
|
||||
// (UnicodeBig and UnicodeLittle in Java)
|
||||
// - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E:
|
||||
// acceptBOM | strictBOM (e.g. assigned to CheckBOM).
|
||||
// This addition would be consistent with supporting ExpectBOM.
|
||||
)
|
||||
|
||||
// Endianness is a UTF-16 encoding's default endianness.
|
||||
type Endianness bool
|
||||
|
||||
const (
|
||||
// BigEndian is UTF-16BE.
|
||||
BigEndian Endianness = false
|
||||
// LittleEndian is UTF-16LE.
|
||||
LittleEndian Endianness = true
|
||||
)
|
||||
|
||||
// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a
|
||||
// starting byte order mark.
|
||||
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
|
||||
|
||||
type utf16Encoding struct {
|
||||
config
|
||||
mib identifier.MIB
|
||||
}
|
||||
|
||||
type config struct {
|
||||
endianness Endianness
|
||||
bomPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: &utf16Decoder{
|
||||
initial: u.config,
|
||||
current: u.config,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: &utf16Encoder{
|
||||
endianness: u.endianness,
|
||||
initialBOMPolicy: u.bomPolicy,
|
||||
currentBOMPolicy: u.bomPolicy,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return u.mib, ""
|
||||
}
|
||||
|
||||
func (u utf16Encoding) String() string {
|
||||
e, b := "B", ""
|
||||
if u.endianness == LittleEndian {
|
||||
e = "L"
|
||||
}
|
||||
switch u.bomPolicy {
|
||||
case ExpectBOM:
|
||||
b = "Expect"
|
||||
case UseBOM:
|
||||
b = "Use"
|
||||
case IgnoreBOM:
|
||||
b = "Ignore"
|
||||
}
|
||||
return "UTF-16" + e + "E (" + b + " BOM)"
|
||||
}
|
||||
|
||||
type utf16Decoder struct {
|
||||
initial config
|
||||
current config
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Reset() {
|
||||
u.current = u.initial
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(src) == 0 {
|
||||
if atEOF && u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
return 0, 0, nil
|
||||
}
|
||||
if u.current.bomPolicy&acceptBOM != 0 {
|
||||
if len(src) < 2 {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
switch {
|
||||
case src[0] == 0xfe && src[1] == 0xff:
|
||||
u.current.endianness = BigEndian
|
||||
nSrc = 2
|
||||
case src[0] == 0xff && src[1] == 0xfe:
|
||||
u.current.endianness = LittleEndian
|
||||
nSrc = 2
|
||||
default:
|
||||
if u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
}
|
||||
u.current.bomPolicy = IgnoreBOM
|
||||
}
|
||||
|
||||
var r rune
|
||||
var dSize, sSize int
|
||||
for nSrc < len(src) {
|
||||
if nSrc+1 < len(src) {
|
||||
x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
r, sSize = rune(x), 2
|
||||
if utf16.IsSurrogate(r) {
|
||||
if nSrc+3 < len(src) {
|
||||
x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
// Save for next iteration if it is not a high surrogate.
|
||||
if isHighSurrogate(rune(x)) {
|
||||
r, sSize = utf16.DecodeRune(r, rune(x)), 4
|
||||
}
|
||||
} else if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if dSize = utf8.RuneLen(r); dSize < 0 {
|
||||
r, dSize = utf8.RuneError, 3
|
||||
}
|
||||
} else if atEOF {
|
||||
// Single trailing byte.
|
||||
r, dSize, sSize = utf8.RuneError, 3, 1
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
if nDst+dSize > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
nSrc += sSize
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func isHighSurrogate(r rune) bool {
|
||||
return 0xDC00 <= r && r <= 0xDFFF
|
||||
}
|
||||
|
||||
type utf16Encoder struct {
|
||||
endianness Endianness
|
||||
initialBOMPolicy BOMPolicy
|
||||
currentBOMPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Reset() {
|
||||
u.currentBOMPolicy = u.initialBOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if u.currentBOMPolicy&writeBOM != 0 {
|
||||
if len(dst) < 2 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
dst[0], dst[1] = 0xfe, 0xff
|
||||
u.currentBOMPolicy = IgnoreBOM
|
||||
nDst = 2
|
||||
}
|
||||
|
||||
r, size := rune(0), 0
|
||||
for nSrc < len(src) {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r <= 0xffff {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
} else {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
dst[nDst+0] = uint8(r1 >> 8)
|
||||
dst[nDst+1] = uint8(r1)
|
||||
dst[nDst+2] = uint8(r2 >> 8)
|
||||
dst[nDst+3] = uint8(r2)
|
||||
nDst += 4
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
|
||||
if u.endianness == LittleEndian {
|
||||
for i := 0; i < nDst; i += 2 {
|
||||
dst[i], dst[i+1] = dst[i+1], dst[i]
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
296
vendor/golang.org/x/text/encoding/unicode/utf32/utf32.go
generated
vendored
296
vendor/golang.org/x/text/encoding/unicode/utf32/utf32.go
generated
vendored
@ -1,296 +0,0 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package utf32 provides the UTF-32 Unicode encoding.
|
||||
//
|
||||
// Please note that support for UTF-32 is discouraged as it is a rare and
|
||||
// inefficient encoding, unfit for use as an interchange format. For use
|
||||
// on the web, the W3C strongly discourages its use
|
||||
// (https://www.w3.org/TR/html5/document-metadata.html#charset)
|
||||
// while WHATWG directly prohibits supporting it
|
||||
// (https://html.spec.whatwg.org/multipage/syntax.html#character-encodings).
|
||||
package utf32 // import "golang.org/x/text/encoding/unicode/utf32"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// All lists a configuration for each IANA-defined UTF-32 variant.
|
||||
var All = []encoding.Encoding{
|
||||
UTF32(BigEndian, UseBOM),
|
||||
UTF32(BigEndian, IgnoreBOM),
|
||||
UTF32(LittleEndian, IgnoreBOM),
|
||||
}
|
||||
|
||||
// ErrMissingBOM means that decoding UTF-32 input with ExpectBOM did not
|
||||
// find a starting byte order mark.
|
||||
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
|
||||
|
||||
// UTF32 returns a UTF-32 Encoding for the given default endianness and
|
||||
// byte order mark (BOM) policy.
|
||||
//
|
||||
// When decoding from UTF-32 to UTF-8, if the BOMPolicy is IgnoreBOM then
|
||||
// neither BOMs U+FEFF nor ill-formed code units 0xFFFE0000 in the input
|
||||
// stream will affect the endianness used for decoding. Instead BOMs will
|
||||
// be output as their standard UTF-8 encoding "\xef\xbb\xbf" while
|
||||
// 0xFFFE0000 code units will be output as "\xef\xbf\xbd", the standard
|
||||
// UTF-8 encoding for the Unicode replacement character. If the BOMPolicy
|
||||
// is UseBOM or ExpectBOM a starting BOM is not written to the UTF-8
|
||||
// output. Instead, it overrides the default endianness e for the remainder
|
||||
// of the transformation. Any subsequent BOMs U+FEFF or ill-formed code
|
||||
// units 0xFFFE0000 will not affect the endianness used, and will instead
|
||||
// be output as their standard UTF-8 (replacement) encodings. For UseBOM,
|
||||
// if there is no starting BOM, it will proceed with the default
|
||||
// Endianness. For ExpectBOM, in that case, the transformation will return
|
||||
// early with an ErrMissingBOM error.
|
||||
//
|
||||
// When encoding from UTF-8 to UTF-32, a BOM will be inserted at the start
|
||||
// of the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM
|
||||
// will not be inserted. The UTF-8 input does not need to contain a BOM.
|
||||
//
|
||||
// There is no concept of a 'native' endianness. If the UTF-32 data is
|
||||
// produced and consumed in a greater context that implies a certain
|
||||
// endianness, use IgnoreBOM. Otherwise, use ExpectBOM and always produce
|
||||
// and consume a BOM.
|
||||
//
|
||||
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10,
|
||||
// IgnoreBOM corresponds to "Where the precise type of the data stream is
|
||||
// known... the BOM should not be used" and ExpectBOM corresponds to "A
|
||||
// particular protocol... may require use of the BOM".
|
||||
func UTF32(e Endianness, b BOMPolicy) encoding.Encoding {
|
||||
return utf32Encoding{config{e, b}, mibValue[e][b&bomMask]}
|
||||
}
|
||||
|
||||
// mibValue maps Endianness and BOMPolicy settings to MIB constants for UTF-32.
|
||||
// Note that some configurations map to the same MIB identifier.
|
||||
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
|
||||
BigEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF32BE,
|
||||
UseBOM: identifier.UTF32,
|
||||
},
|
||||
LittleEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF32LE,
|
||||
UseBOM: identifier.UTF32,
|
||||
},
|
||||
// ExpectBOM is not widely used and has no valid MIB identifier.
|
||||
}
|
||||
|
||||
// BOMPolicy is a UTF-32 encodings's byte order mark policy.
|
||||
type BOMPolicy uint8
|
||||
|
||||
const (
|
||||
writeBOM BOMPolicy = 0x01
|
||||
acceptBOM BOMPolicy = 0x02
|
||||
requireBOM BOMPolicy = 0x04
|
||||
bomMask BOMPolicy = 0x07
|
||||
|
||||
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
|
||||
// map of an array of length 8 of a type that is also used as a key or value
|
||||
// in another map). See golang.org/issue/11354.
|
||||
// TODO: consider changing this value back to 8 if the use of 1.4.* has
|
||||
// been minimized.
|
||||
numBOMValues = 8 + 1
|
||||
|
||||
// IgnoreBOM means to ignore any byte order marks.
|
||||
IgnoreBOM BOMPolicy = 0
|
||||
// Unicode-compliant interpretation for UTF-32BE/LE.
|
||||
|
||||
// UseBOM means that the UTF-32 form may start with a byte order mark,
|
||||
// which will be used to override the default encoding.
|
||||
UseBOM BOMPolicy = writeBOM | acceptBOM
|
||||
// Unicode-compliant interpretation for UTF-32.
|
||||
|
||||
// ExpectBOM means that the UTF-32 form must start with a byte order mark,
|
||||
// which will be used to override the default encoding.
|
||||
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
|
||||
// Consistent with BOMPolicy definition in golang.org/x/text/encoding/unicode
|
||||
)
|
||||
|
||||
// Endianness is a UTF-32 encoding's default endianness.
|
||||
type Endianness bool
|
||||
|
||||
const (
|
||||
// BigEndian is UTF-32BE.
|
||||
BigEndian Endianness = false
|
||||
// LittleEndian is UTF-32LE.
|
||||
LittleEndian Endianness = true
|
||||
)
|
||||
|
||||
type config struct {
|
||||
endianness Endianness
|
||||
bomPolicy BOMPolicy
|
||||
}
|
||||
|
||||
type utf32Encoding struct {
|
||||
config
|
||||
mib identifier.MIB
|
||||
}
|
||||
|
||||
func (u utf32Encoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: &utf32Decoder{
|
||||
initial: u.config,
|
||||
current: u.config,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf32Encoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: &utf32Encoder{
|
||||
endianness: u.endianness,
|
||||
initialBOMPolicy: u.bomPolicy,
|
||||
currentBOMPolicy: u.bomPolicy,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf32Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return u.mib, ""
|
||||
}
|
||||
|
||||
func (u utf32Encoding) String() string {
|
||||
e, b := "B", ""
|
||||
if u.endianness == LittleEndian {
|
||||
e = "L"
|
||||
}
|
||||
switch u.bomPolicy {
|
||||
case ExpectBOM:
|
||||
b = "Expect"
|
||||
case UseBOM:
|
||||
b = "Use"
|
||||
case IgnoreBOM:
|
||||
b = "Ignore"
|
||||
}
|
||||
return "UTF-32" + e + "E (" + b + " BOM)"
|
||||
}
|
||||
|
||||
type utf32Decoder struct {
|
||||
initial config
|
||||
current config
|
||||
}
|
||||
|
||||
func (u *utf32Decoder) Reset() {
|
||||
u.current = u.initial
|
||||
}
|
||||
|
||||
func (u *utf32Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(src) == 0 {
|
||||
if atEOF && u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
return 0, 0, nil
|
||||
}
|
||||
if u.current.bomPolicy&acceptBOM != 0 {
|
||||
if len(src) < 4 {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
switch {
|
||||
case src[0] == 0x00 && src[1] == 0x00 && src[2] == 0xfe && src[3] == 0xff:
|
||||
u.current.endianness = BigEndian
|
||||
nSrc = 4
|
||||
case src[0] == 0xff && src[1] == 0xfe && src[2] == 0x00 && src[3] == 0x00:
|
||||
u.current.endianness = LittleEndian
|
||||
nSrc = 4
|
||||
default:
|
||||
if u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
}
|
||||
u.current.bomPolicy = IgnoreBOM
|
||||
}
|
||||
|
||||
var r rune
|
||||
var dSize, sSize int
|
||||
for nSrc < len(src) {
|
||||
if nSrc+3 < len(src) {
|
||||
x := uint32(src[nSrc+0])<<24 | uint32(src[nSrc+1])<<16 |
|
||||
uint32(src[nSrc+2])<<8 | uint32(src[nSrc+3])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>24 | (x >> 8 & 0x0000FF00) | (x << 8 & 0x00FF0000) | x<<24
|
||||
}
|
||||
r, sSize = rune(x), 4
|
||||
if dSize = utf8.RuneLen(r); dSize < 0 {
|
||||
r, dSize = utf8.RuneError, 3
|
||||
}
|
||||
} else if atEOF {
|
||||
// 1..3 trailing bytes.
|
||||
r, dSize, sSize = utf8.RuneError, 3, len(src)-nSrc
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
if nDst+dSize > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
nSrc += sSize
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type utf32Encoder struct {
|
||||
endianness Endianness
|
||||
initialBOMPolicy BOMPolicy
|
||||
currentBOMPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf32Encoder) Reset() {
|
||||
u.currentBOMPolicy = u.initialBOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf32Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if u.currentBOMPolicy&writeBOM != 0 {
|
||||
if len(dst) < 4 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
dst[0], dst[1], dst[2], dst[3] = 0x00, 0x00, 0xfe, 0xff
|
||||
u.currentBOMPolicy = IgnoreBOM
|
||||
nDst = 4
|
||||
}
|
||||
|
||||
r, size := rune(0), 0
|
||||
for nSrc < len(src) {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
|
||||
dst[nDst+0] = uint8(r >> 24)
|
||||
dst[nDst+1] = uint8(r >> 16)
|
||||
dst[nDst+2] = uint8(r >> 8)
|
||||
dst[nDst+3] = uint8(r)
|
||||
nDst += 4
|
||||
nSrc += size
|
||||
}
|
||||
|
||||
if u.endianness == LittleEndian {
|
||||
for i := 0; i < nDst; i += 4 {
|
||||
dst[i], dst[i+1], dst[i+2], dst[i+3] = dst[i+3], dst[i+2], dst[i+1], dst[i]
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
Reference in New Issue
Block a user