4
0
mirror of https://github.com/cwinfo/matterbridge.git synced 2025-06-27 15:49:23 +00:00

Add go-charset and chardet to vendor

This commit is contained in:
Wim
2017-07-07 23:34:05 +02:00
parent 2338c69d40
commit a0938d9386
47 changed files with 3974 additions and 0 deletions

27
vendor/github.com/paulrosania/go-charset/LICENSE generated vendored Normal file
View File

@ -0,0 +1,27 @@
Copyright (c) 2014, Paul Rosania. All rights reserved.
Portions Copyright (c) 2013, Roger Peppe. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,65 @@
package charset
import (
"bytes"
"fmt"
"unicode/utf8"
)
func init() {
registerClass("ascii", fromASCII, toASCII)
}
const errorByte = '?'
type translateFromASCII bool
type codePointError struct {
i int
cp rune
charset string
}
func (e *codePointError) Error() string {
return fmt.Sprintf("Parse error at index %n: Code point %n is undefined in %s", e.i, e.cp, e.charset)
}
func (strict translateFromASCII) Translate(data []byte, eof bool) (int, []byte, error) {
buf := bytes.NewBuffer(make([]byte, 0, len(data)))
for i, c := range data {
if c > 0 && c < 128 {
buf.WriteByte(c)
if c < 32 && c != 10 && c != 13 && c != 9 {
// badly formed
}
} else {
if strict {
return 0, nil, &codePointError{i, rune(c), "US-ASCII"}
}
buf.WriteRune(utf8.RuneError)
}
}
return len(data), buf.Bytes(), nil
}
type translateToASCII bool
func (strict translateToASCII) Translate(data []byte, eof bool) (int, []byte, error) {
buf := bytes.NewBuffer(make([]byte, 0, len(data)))
for _, c := range data {
if c > 0 && c < 128 {
buf.WriteByte(c)
} else {
buf.WriteByte(errorByte)
}
}
return len(data), buf.Bytes(), nil
}
func fromASCII(arg string) (Translator, error) {
return new(translateFromASCII), nil
}
func toASCII(arg string) (Translator, error) {
return new(translateToASCII), nil
}

View File

@ -0,0 +1,88 @@
package charset
import (
"fmt"
"unicode/utf8"
)
func init() {
registerClass("big5", fromBig5, nil)
}
// Big5 consists of 89 fonts of 157 chars each
const (
big5Max = 13973
big5Font = 157
big5Data = "big5.dat"
)
type translateFromBig5 struct {
font int
scratch []byte
big5map []rune
}
func (p *translateFromBig5) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = p.scratch[:0]
n := 0
for len(data) > 0 {
c := int(data[0])
data = data[1:]
n++
if p.font == -1 {
// idle state
if c >= 0xa1 {
p.font = c
continue
}
if c == 26 {
c = '\n'
}
continue
}
f := p.font
p.font = -1
r := utf8.RuneError
switch {
case c >= 64 && c <= 126:
c -= 64
case c >= 161 && c <= 254:
c = c - 161 + 63
default:
// bad big5 char
f = 255
}
if f <= 254 {
f -= 161
ix := f*big5Font + c
if ix < len(p.big5map) {
r = p.big5map[ix]
}
if r == -1 {
r = utf8.RuneError
}
}
p.scratch = appendRune(p.scratch, r)
}
return n, p.scratch, nil
}
type big5Key bool
func fromBig5(arg string) (Translator, error) {
big5map, err := cache(big5Key(false), func() (interface{}, error) {
data, err := readFile(big5Data)
if err != nil {
return nil, fmt.Errorf("charset: cannot open big5 data file: %v", err)
}
big5map := []rune(string(data))
if len(big5map) != big5Max {
return nil, fmt.Errorf("charset: corrupt big5 data")
}
return big5map, nil
})
if err != nil {
return nil, err
}
return &translateFromBig5{big5map: big5map.([]rune), font: -1}, nil
}

View File

@ -0,0 +1,301 @@
// The charset package implements translation between character sets.
// It uses Unicode as the intermediate representation.
// Because it can be large, the character set data is separated
// from the charset package. It can be embedded in the Go
// executable by importing the data package:
//
// import _ "github.com/paulrosania/go-charset/data"
//
// It can also made available in a data directory (by settting CharsetDir).
package charset
import (
"io"
"strings"
"unicode/utf8"
)
// Charset holds information about a given character set.
type Charset struct {
Name string // Canonical name of character set.
Aliases []string // Known aliases.
Desc string // Description.
NoFrom bool // Not possible to translate from this charset.
NoTo bool // Not possible to translate to this charset.
}
// Translator represents a character set converter.
// The Translate method translates the given data,
// and returns the number of bytes of data consumed,
// a slice containing the converted data (which may be
// overwritten on the next call to Translate), and any
// conversion error. If eof is true, the data represents
// the final bytes of the input.
type Translator interface {
Translate(data []byte, eof bool) (n int, cdata []byte, err error)
}
// A Factory can be used to make character set translators.
type Factory interface {
// TranslatorFrom creates a translator that will translate from the named character
// set to UTF-8.
TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.
// TranslatorTo creates a translator that will translate from UTF-8 to the named character set.
TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.
// Names returns all the character set names accessibile through the factory.
Names() []string
// Info returns information on the named character set. It returns nil if the
// factory doesn't recognise the given name.
Info(name string) *Charset
}
var factories = []Factory{localFactory{}}
// Register registers a new Factory which will be consulted when NewReader
// or NewWriter needs a character set translator for a given name.
func Register(factory Factory) {
factories = append(factories, factory)
}
// NewReader returns a new Reader that translates from the named
// character set to UTF-8 as it reads r.
func NewReader(charset string, r io.Reader) (io.Reader, error) {
tr, err := TranslatorFrom(charset)
if err != nil {
return nil, err
}
return NewTranslatingReader(r, tr), nil
}
// NewWriter returns a new WriteCloser writing to w. It converts writes
// of UTF-8 text into writes on w of text in the named character set.
// The Close is necessary to flush any remaining partially translated
// characters to the output.
func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {
tr, err := TranslatorTo(charset)
if err != nil {
return nil, err
}
return NewTranslatingWriter(w, tr), nil
}
// Info returns information about a character set, or nil
// if the character set is not found.
func Info(name string) *Charset {
for _, f := range factories {
if info := f.Info(name); info != nil {
return info
}
}
return nil
}
// Names returns the canonical names of all supported character sets, in alphabetical order.
func Names() []string {
// TODO eliminate duplicates
var names []string
for _, f := range factories {
names = append(names, f.Names()...)
}
return names
}
// TranslatorFrom returns a translator that will translate from
// the named character set to UTF-8.
func TranslatorFrom(charset string) (Translator, error) {
var err error
var tr Translator
for _, f := range factories {
tr, err = f.TranslatorFrom(charset)
if err == nil {
break
}
}
if tr == nil {
return nil, err
}
return tr, nil
}
// TranslatorTo returns a translator that will translate from UTF-8
// to the named character set.
func TranslatorTo(charset string) (Translator, error) {
var err error
var tr Translator
for _, f := range factories {
tr, err = f.TranslatorTo(charset)
if err == nil {
break
}
}
if tr == nil {
return nil, err
}
return tr, nil
}
func normalizedChar(c rune) rune {
switch {
case c >= 'A' && c <= 'Z':
c = c - 'A' + 'a'
case c == '_':
c = '-'
}
return c
}
// NormalisedName returns s with all Roman capitals
// mapped to lower case, and '_' mapped to '-'
func NormalizedName(s string) string {
return strings.Map(normalizedChar, s)
}
type translatingWriter struct {
w io.Writer
tr Translator
buf []byte // unconsumed data from writer.
}
// NewTranslatingWriter returns a new WriteCloser writing to w.
// It passes the written bytes through the given Translator.
func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {
return &translatingWriter{w: w, tr: tr}
}
func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {
wdata := data
if len(w.buf) > 0 {
w.buf = append(w.buf, data...)
wdata = w.buf
}
n, cdata, err := w.tr.Translate(wdata, false)
if err != nil {
// TODO
}
if n > 0 {
_, err = w.w.Write(cdata)
if err != nil {
return 0, err
}
}
w.buf = w.buf[:0]
if n < len(wdata) {
w.buf = append(w.buf, wdata[n:]...)
}
return len(data), nil
}
func (p *translatingWriter) Close() error {
for {
n, data, err := p.tr.Translate(p.buf, true)
p.buf = p.buf[n:]
if err != nil {
// TODO
}
// If the Translator produces no data
// at EOF, then assume that it never will.
if len(data) == 0 {
break
}
n, err = p.w.Write(data)
if err != nil {
return err
}
if n < len(data) {
return io.ErrShortWrite
}
if len(p.buf) == 0 {
break
}
}
return nil
}
type translatingReader struct {
r io.Reader
tr Translator
cdata []byte // unconsumed data from converter.
rdata []byte // unconverted data from reader.
err error // final error from reader.
}
// NewTranslatingReader returns a new Reader that
// translates data using the given Translator as it reads r.
func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {
return &translatingReader{r: r, tr: tr}
}
func (r *translatingReader) Read(buf []byte) (int, error) {
for {
if len(r.cdata) > 0 {
n := copy(buf, r.cdata)
r.cdata = r.cdata[n:]
return n, nil
}
if r.err == nil {
r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))
n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])
// Guard against non-compliant Readers.
if n == 0 && err == nil {
err = io.EOF
}
r.rdata = r.rdata[0 : len(r.rdata)+n]
r.err = err
} else if len(r.rdata) == 0 {
break
}
nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)
if cvterr != nil {
// TODO
}
r.cdata = cdata
// Ensure that we consume all bytes at eof
// if the converter refuses them.
if nc == 0 && r.err != nil {
nc = len(r.rdata)
}
// Copy unconsumed data to the start of the rdata buffer.
r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]
}
return 0, r.err
}
// ensureCap returns s with a capacity of at least n bytes.
// If cap(s) < n, then it returns a new copy of s with the
// required capacity.
func ensureCap(s []byte, n int) []byte {
if n <= cap(s) {
return s
}
// logic adapted from appendslice1 in runtime
m := cap(s)
if m == 0 {
m = n
} else {
for {
if m < 1024 {
m += m
} else {
m += m / 4
}
if m >= n {
break
}
}
}
t := make([]byte, len(s), m)
copy(t, s)
return t
}
func appendRune(buf []byte, r rune) []byte {
n := len(buf)
buf = ensureCap(buf, n+utf8.UTFMax)
nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
return buf[0 : n+nu]
}

View File

@ -0,0 +1,133 @@
package charset
import (
"fmt"
"unicode/utf8"
)
func init() {
registerClass("cp", fromCodePage, toCodePage)
}
type translateFromCodePage struct {
byte2rune *[256]rune
scratch []byte
}
type cpKeyFrom string
type cpKeyTo string
func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
buf := p.scratch
for _, x := range data {
r := p.byte2rune[x]
if r < utf8.RuneSelf {
buf = append(buf, byte(r))
continue
}
size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
buf = buf[0 : len(buf)+size]
}
return len(data), buf, nil
}
type toCodePageInfo struct {
rune2byte map[rune]byte
// same gives the number of runes at start of code page that map exactly to
// unicode.
same rune
}
type translateToCodePage struct {
toCodePageInfo
scratch []byte
}
func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch, len(data))
buf := p.scratch[:0]
for i := 0; i < len(data); {
r := rune(data[i])
size := 1
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(data[i:])
if size == 1 && !eof && !utf8.FullRune(data[i:]) {
return i, buf, nil
}
}
var b byte
if r < p.same {
b = byte(r)
} else {
var ok bool
b, ok = p.rune2byte[r]
if !ok {
b = '?'
}
}
buf = append(buf, b)
i += size
}
return len(data), buf, nil
}
func fromCodePage(arg string) (Translator, error) {
runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
data, err := readFile(arg)
if err != nil {
return nil, err
}
runes := []rune(string(data))
if len(runes) != 256 {
return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
}
r := new([256]rune)
copy(r[:], runes)
return r, nil
})
if err != nil {
return nil, err
}
return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
}
func toCodePage(arg string) (Translator, error) {
m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
data, err := readFile(arg)
if err != nil {
return nil, err
}
info := toCodePageInfo{
rune2byte: make(map[rune]byte),
same: 256,
}
atStart := true
i := rune(0)
for _, r := range string(data) {
if atStart {
if r == i {
i++
continue
}
info.same = i
atStart = false
}
info.rune2byte[r] = byte(i)
i++
}
// TODO fix tables
// fmt.Printf("%s, same = %d\n", arg, info.same)
if i != 256 {
return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
}
return info, nil
})
if err != nil {
return nil, err
}
return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
}

View File

@ -0,0 +1,195 @@
package charset
import (
"fmt"
"unicode/utf8"
)
func init() {
registerClass("cp932", fromCP932, nil)
}
// encoding details
// (Traditional) Shift-JIS
//
// 00..1f control characters
// 20 space
// 21..7f JIS X 0201:1976/1997 roman (see notes)
// 80 undefined
// 81..9f lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997
// a0 undefined
// a1..df JIS X 0201:1976/1997 katakana
// e0..ea lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997
// eb..ff undefined
//
// CP932 (windows-31J)
//
// this encoding scheme extends Shift-JIS in the following way
//
// eb..ec undefined (marked as lead bytes - see notes below)
// ed..ee lead byte of NEC-selected IBM extended characters
// ef undefined (marked as lead byte - see notes below)
// f0..f9 lead byte of User defined GAIJI (see note below)
// fa..fc lead byte of IBM extended characters
// fd..ff undefined
//
//
// Notes
//
// JISX 0201:1976/1997 roman
// this is the same as ASCII but with 0x5c (ASCII code for '\')
// representing the Yen currency symbol '¥' (U+00a5)
// This mapping is contentious, some conversion packages implent it
// others do not.
// The mapping files from The Unicode Consortium show cp932 mapping
// plain ascii in the range 00..7f whereas shift-jis maps 0x5c ('\') to the yen
// symbol (¥) and 0x7e ('~') to overline (¯)
//
// CP932 double-byte character codes:
//
// eb-ec, ef, f0-f9:
// Marked as DBCS LEAD BYTEs in the unicode mapping data
// obtained from:
// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
//
// but there are no defined mappings for codes in this range.
// It is not clear whether or not an implementation should
// consume one or two bytes before emitting an error char.
const (
kanaPages = 1
kanaPageSize = 63
kanaChar0 = 0xa1
cp932Pages = 45 // 81..84, 87..9f, e0..ea, ed..ee, fa..fc
cp932PageSize = 189 // 40..fc (including 7f)
cp932Char0 = 0x40
)
type jisTables struct {
page0 [256]rune
dbcsoff [256]int
cp932 []rune
}
type translateFromCP932 struct {
tables *jisTables
scratch []byte
}
func (p *translateFromCP932) Translate(data []byte, eof bool) (int, []byte, error) {
tables := p.tables
p.scratch = p.scratch[:0]
n := 0
for i := 0; i < len(data); i++ {
b := data[i]
r := tables.page0[b]
if r != -1 {
p.scratch = appendRune(p.scratch, r)
n++
continue
}
// DBCS
i++
if i >= len(data) {
break
}
pnum := tables.dbcsoff[b]
ix := int(data[i]) - cp932Char0
if pnum == -1 || ix < 0 || ix >= cp932PageSize {
r = utf8.RuneError
} else {
r = tables.cp932[pnum*cp932PageSize+ix]
}
p.scratch = appendRune(p.scratch, r)
n += 2
}
return n, p.scratch, nil
}
type cp932Key bool
func fromCP932(arg string) (Translator, error) {
shiftJIS := arg == "shiftjis"
tables, err := cache(cp932Key(shiftJIS), func() (interface{}, error) {
tables := new(jisTables)
kana, err := jisGetMap("jisx0201kana.dat", kanaPageSize, kanaPages)
if err != nil {
return nil, err
}
tables.cp932, err = jisGetMap("cp932.dat", cp932PageSize, cp932Pages)
if err != nil {
return nil, err
}
// jisx0201kana is mapped into 0xA1..0xDF
for i := 0; i < kanaPageSize; i++ {
tables.page0[i+kanaChar0] = kana[i]
}
// 00..7f same as ascii in cp932
for i := rune(0); i < 0x7f; i++ {
tables.page0[i] = i
}
if shiftJIS {
// shift-jis uses JIS X 0201 for the ASCII range
// this is the same as ASCII apart from
// 0x5c ('\') maps to yen symbol (¥) and 0x7e ('~') maps to overline (¯)
tables.page0['\\'] = '¥'
tables.page0['~'] = '¯'
}
// pre-calculate DBCS page numbers to mapping file page numbers
// and mark codes in page0 that are DBCS lead bytes
pnum := 0
for i := 0x81; i <= 0x84; i++ {
tables.page0[i] = -1
tables.dbcsoff[i] = pnum
pnum++
}
for i := 0x87; i <= 0x9f; i++ {
tables.page0[i] = -1
tables.dbcsoff[i] = pnum
pnum++
}
for i := 0xe0; i <= 0xea; i++ {
tables.page0[i] = -1
tables.dbcsoff[i] = pnum
pnum++
}
if shiftJIS {
return tables, nil
}
// add in cp932 extensions
for i := 0xed; i <= 0xee; i++ {
tables.page0[i] = -1
tables.dbcsoff[i] = pnum
pnum++
}
for i := 0xfa; i <= 0xfc; i++ {
tables.page0[i] = -1
tables.dbcsoff[i] = pnum
pnum++
}
return tables, nil
})
if err != nil {
return nil, err
}
return &translateFromCP932{tables: tables.(*jisTables)}, nil
}
func jisGetMap(name string, pgsize, npages int) ([]rune, error) {
data, err := readFile(name)
if err != nil {
return nil, err
}
m := []rune(string(data))
if len(m) != pgsize*npages {
return nil, fmt.Errorf("%q: incorrect length data", name)
}
return m, nil
}

View File

@ -0,0 +1,40 @@
package charset
import (
"io"
"io/ioutil"
"os"
"path/filepath"
)
var files = make(map[string]func() (io.ReadCloser, error))
// RegisterDataFile registers the existence of a given data
// file with the given name that may be used by a character-set converter.
// It is intended to be used by packages that wish to embed
// data in the executable binary, and should not be
// used normally.
func RegisterDataFile(name string, open func() (io.ReadCloser, error)) {
files[name] = open
}
// CharsetDir gives the location of the default data file directory.
// This directory will be used for files with names that have not
// been registered with RegisterDataFile.
var CharsetDir = "/usr/local/lib/go-charset/datafiles"
func readFile(name string) (data []byte, err error) {
var r io.ReadCloser
if open := files[name]; open != nil {
r, err = open()
if err != nil {
return
}
} else {
r, err = os.Open(filepath.Join(CharsetDir, name))
if err != nil {
return
}
}
return ioutil.ReadAll(r)
}

View File

@ -0,0 +1,184 @@
// The iconv package provides an interface to the GNU iconv character set
// conversion library (see http://www.gnu.org/software/libiconv/).
// It automatically registers all the character sets with the charset package,
// so it is usually used simply for the side effects of importing it.
// Example:
// import (
// "go-charset.googlecode.com/hg/charset"
// _ "go-charset.googlecode.com/hg/charset/iconv"
// )
package iconv
//#cgo darwin LDFLAGS: -liconv
//#include <stdlib.h>
//#include <iconv.h>
//#include <errno.h>
//iconv_t iconv_open_error = (iconv_t)-1;
//size_t iconv_error = (size_t)-1;
import "C"
import (
"errors"
"fmt"
"github.com/paulrosania/go-charset/charset"
"runtime"
"strings"
"syscall"
"unicode/utf8"
"unsafe"
)
type iconvTranslator struct {
cd C.iconv_t
invalid rune
scratch []byte
}
func canonicalChar(c rune) rune {
if c >= 'a' && c <= 'z' {
return c - 'a' + 'A'
}
return c
}
func canonicalName(s string) string {
return strings.Map(canonicalChar, s)
}
func init() {
charset.Register(iconvFactory{})
}
type iconvFactory struct {
}
func (iconvFactory) TranslatorFrom(name string) (charset.Translator, error) {
return Translator("UTF-8", name, utf8.RuneError)
}
func (iconvFactory) TranslatorTo(name string) (charset.Translator, error) {
// BUG This is wrong. The target character set may not be ASCII
// compatible. There's no easy solution to this other than
// removing the offending code point.
return Translator(name, "UTF-8", '?')
}
// Translator returns a Translator that translates between
// the named character sets. When an invalid multibyte
// character is found, the bytes in invalid are substituted instead.
func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) {
cto, cfrom := C.CString(toCharset), C.CString(fromCharset)
cd, err := C.iconv_open(cto, cfrom)
C.free(unsafe.Pointer(cfrom))
C.free(unsafe.Pointer(cto))
if cd == C.iconv_open_error {
if err == syscall.EINVAL {
return nil, errors.New("iconv: conversion not supported")
}
return nil, err
}
t := &iconvTranslator{cd: cd, invalid: invalid}
runtime.SetFinalizer(t, func(*iconvTranslator) {
C.iconv_close(cd)
})
return t, nil
}
func (iconvFactory) Names() []string {
all := aliases()
names := make([]string, 0, len(all))
for name, aliases := range all {
if aliases[0] == name {
names = append(names, name)
}
}
return names
}
func (iconvFactory) Info(name string) *charset.Charset {
name = strings.ToLower(name)
all := aliases()
a, ok := all[name]
if !ok {
return nil
}
return &charset.Charset{
Name: name,
Aliases: a,
}
}
func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) {
n := 0
p.scratch = p.scratch[:0]
for len(data) > 0 {
p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax)
cData := (*C.char)(unsafe.Pointer(&data[:1][0]))
nData := C.size_t(len(data))
ns := len(p.scratch)
cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0]))
nScratch := C.size_t(cap(p.scratch) - ns)
r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch)
p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)]
n += len(data) - int(nData)
data = data[len(data)-int(nData):]
if r != C.iconv_error || err == nil {
return n, p.scratch, nil
}
switch err := err.(syscall.Errno); err {
case C.EILSEQ:
// invalid multibyte sequence - skip one byte and continue
p.scratch = appendRune(p.scratch, p.invalid)
n++
data = data[1:]
case C.EINVAL:
// incomplete multibyte sequence
return n, p.scratch, nil
case C.E2BIG:
// output buffer not large enough; try again with larger buffer.
p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax)
default:
panic(fmt.Sprintf("unexpected error code: %v", err))
}
}
return n, p.scratch, nil
}
// ensureCap returns s with a capacity of at least n bytes.
// If cap(s) < n, then it returns a new copy of s with the
// required capacity.
func ensureCap(s []byte, n int) []byte {
if n <= cap(s) {
return s
}
// logic adapted from appendslice1 in runtime
m := cap(s)
if m == 0 {
m = n
} else {
for {
if m < 1024 {
m += m
} else {
m += m / 4
}
if m >= n {
break
}
}
}
t := make([]byte, len(s), m)
copy(t, s)
return t
}
func appendRune(buf []byte, r rune) []byte {
n := len(buf)
buf = ensureCap(buf, n+utf8.UTFMax)
nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
return buf[0 : n+nu]
}

View File

@ -0,0 +1,80 @@
// +build !linux
// This file is systemdependent because not all versions
// of iconv have the iconvlist function.
package iconv
//#cgo darwin LDFLAGS: -liconv
//#cgo freebsd LDFLAGS: -liconv
//#cgo windows LDFLAGS: -liconv
//#include <stdlib.h>
//#include <string.h>
//#include <iconv.h>
//#include <errno.h>
//
//typedef struct nameList nameList;
//struct nameList {
// int n;
// char **names;
// nameList *next;
//};
//
//int
//addNames(unsigned int n, const char *const *names, void *data) {
// // we can't call back to Go because of the stack size issue,
// // so copy all the names.
// nameList *hd, *e;
// int i;
//
// hd = data;
// e = malloc(sizeof(nameList));
// e->n = n;
// e->names = malloc(sizeof(char*) * n);
// for(i = 0; i < n; i++){
// e->names[i] = strdup(names[i]);
// }
// e->next = hd->next;
// hd->next = e;
// return 0;
//}
//
//nameList *
//listNames(void) {
// nameList hd;
// hd.next = 0;
// iconvlist(addNames, &hd);
// return hd.next;
//}
import "C"
import (
"strings"
"sync"
"unsafe"
)
var getAliasesOnce sync.Once
var allAliases = map[string][]string{}
func aliases() map[string][]string {
getAliasesOnce.Do(getAliases)
return allAliases
}
func getAliases() {
var next *C.nameList
for p := C.listNames(); p != nil; p = next {
next = p.next
aliases := make([]string, p.n)
pnames := (*[1e9]*C.char)(unsafe.Pointer(p.names))
for i := range aliases {
aliases[i] = strings.ToLower(C.GoString(pnames[i]))
C.free(unsafe.Pointer(pnames[i]))
}
C.free(unsafe.Pointer(p.names))
C.free(unsafe.Pointer(p))
for _, alias := range aliases {
allAliases[alias] = aliases
}
}
}

View File

@ -0,0 +1,176 @@
// +build linux
// We just use a list of names obtained from iconv on a platform
// that allows iconvlist. We could invoke the iconv command,
// but that might fail too, and it gives no information about aliases.
package iconv
import (
"sync"
)
func aliases() map[string][]string {
initAliasesOnce.Do(initAliases)
return allAliases
}
var initAliasesOnce sync.Once
var allAliases map[string][]string
func initAliases() {
allAliases = make(map[string][]string)
for _, a := range aliasData {
for _, alias := range a {
allAliases[alias] = a
}
}
}
var aliasData = [][]string{
{"437", "cp437", "ibm437", "cspc8codepage437"},
{"850", "cp850", "ibm850", "cspc850multilingual"},
{"852", "cp852", "ibm852", "cspcp852"},
{"855", "cp855", "ibm855", "csibm855"},
{"857", "cp857", "ibm857", "csibm857"},
{"860", "cp860", "ibm860", "csibm860"},
{"861", "cp-is", "cp861", "ibm861", "csibm861"},
{"862", "cp862", "ibm862", "cspc862latinhebrew"},
{"863", "cp863", "ibm863", "csibm863"},
{"865", "cp865", "ibm865", "csibm865"},
{"866", "cp866", "ibm866", "csibm866"},
{"869", "cp-gr", "cp869", "ibm869", "csibm869"},
{"ansi-x3.4-1968", "ansi-x3.4-1986", "ascii", "cp367", "ibm367", "iso-ir-6", "iso646-us", "iso-646.irv:1991", "us", "us-ascii", "csascii"},
{"arabic", "asmo-708", "ecma-114", "iso-8859-6", "iso-ir-127", "iso8859-6", "iso-8859-6", "iso-8859-6:1987", "csisolatinarabic"},
{"armscii-8"},
{"atari", "atarist"},
{"big5-2003"},
{"big-5", "big-five", "big5", "bigfive", "cn-big5", "csbig5"},
{"big5-hkscs:1999"},
{"big5-hkscs:2001"},
{"big5-hkscs", "big5-hkscs:2004", "big5hkscs"},
{"c99"},
{"chinese", "gb-2312-80", "iso-ir-58", "csiso58gb231280"},
{"cn", "gb-1988-80", "iso-ir-57", "iso646-cn", "csiso57gb1988"},
{"cn-gb", "euc-cn", "euccn", "gb2312", "csgb2312"},
{"cn-gb-isoir165", "iso-ir-165"},
{"cp1046"},
{"cp1124"},
{"cp1125"},
{"cp1129"},
{"cp1131"},
{"cp1133", "ibm-cp1133"},
{"cp1161", "ibm-1161", "ibm1161", "csibm1161"},
{"cp1162", "ibm-1162", "ibm1162", "csibm1162"},
{"cp1163", "ibm-1163", "ibm1163", "csibm1163"},
{"cp1250", "ms-ee", "windows-1250"},
{"cp1251", "ms-cyrl", "windows-1251"},
{"cp1252", "ms-ansi", "windows-1252"},
{"cp1253", "ms-greek", "windows-1253"},
{"cp1254", "ms-turk", "windows-1254"},
{"cp1255", "ms-hebr", "windows-1255"},
{"cp1256", "ms-arab", "windows-1256"},
{"cp1257", "winbaltrim", "windows-1257"},
{"cp1258", "windows-1258"},
{"cp1361", "johab"},
{"cp154", "cyrillic-asian", "pt154", "ptcp154", "csptcp154"},
{"cp737"},
{"cp775", "ibm775", "cspc775baltic"},
{"cp819", "ibm819", "iso-8859-1", "iso-ir-100", "iso8859-1", "iso-8859-1", "iso-8859-1:1987", "l1", "latin1", "csisolatin1"},
{"cp853"},
{"cp856"},
{"cp858"},
{"cp864", "ibm864", "csibm864"},
{"cp874", "windows-874"},
{"cp922"},
{"cp932"},
{"cp936", "ms936", "windows-936"},
{"cp943"},
{"cp949", "uhc"},
{"cp950"},
{"cyrillic", "iso-8859-5", "iso-ir-144", "iso8859-5", "iso-8859-5", "iso-8859-5:1988", "csisolatincyrillic"},
{"dec-hanyu"},
{"dec-kanji"},
{"ecma-118", "elot-928", "greek", "greek8", "iso-8859-7", "iso-ir-126", "iso8859-7", "iso-8859-7", "iso-8859-7:1987", "iso-8859-7:2003", "csisolatingreek"},
{"euc-jis-2004", "euc-jisx0213"},
{"euc-jp", "eucjp", "extended-unix-code-packed-format-for-japanese", "cseucpkdfmtjapanese"},
{"euc-kr", "euckr", "cseuckr"},
{"euc-tw", "euctw", "cseuctw"},
{"gb18030"},
{"gbk"},
{"georgian-academy"},
{"georgian-ps"},
{"hebrew", "iso-8859-8", "iso-ir-138", "iso8859-8", "iso-8859-8", "iso-8859-8:1988", "csisolatinhebrew"},
{"hp-roman8", "r8", "roman8", "cshproman8"},
{"hz", "hz-gb-2312"},
{"iso-10646-ucs-2", "ucs-2", "csunicode"},
{"iso-10646-ucs-4", "ucs-4", "csucs4"},
{"iso-2022-cn", "csiso2022cn"},
{"iso-2022-cn-ext"},
{"iso-2022-jp-1"},
{"iso-2022-jp-2004", "iso-2022-jp-3"},
{"iso-2022-jp-2", "csiso2022jp2"},
{"iso-2022-jp", "csiso2022jp"},
{"iso-2022-kr", "csiso2022kr"},
{"iso-8859-10", "iso-ir-157", "iso8859-10", "iso-8859-10", "iso-8859-10:1992", "l6", "latin6", "csisolatin6"},
{"iso-8859-11", "iso8859-11", "iso-8859-11"},
{"iso-8859-13", "iso-ir-179", "iso8859-13", "iso-8859-13", "l7", "latin7"},
{"iso-8859-14", "iso-celtic", "iso-ir-199", "iso8859-14", "iso-8859-14", "iso-8859-14:1998", "l8", "latin8"},
{"iso-8859-15", "iso-ir-203", "iso8859-15", "iso-8859-15", "iso-8859-15:1998", "latin-9"},
{"iso-8859-16", "iso-ir-226", "iso8859-16", "iso-8859-16", "iso-8859-16:2001", "l10", "latin10"},
{"iso-8859-2", "iso-ir-101", "iso8859-2", "iso-8859-2", "iso-8859-2:1987", "l2", "latin2", "csisolatin2"},
{"iso-8859-3", "iso-ir-109", "iso8859-3", "iso-8859-3", "iso-8859-3:1988", "l3", "latin3", "csisolatin3"},
{"iso-8859-4", "iso-ir-110", "iso8859-4", "iso-8859-4", "iso-8859-4:1988", "l4", "latin4", "csisolatin4"},
{"iso-8859-9", "iso-ir-148", "iso8859-9", "iso-8859-9", "iso-8859-9:1989", "l5", "latin5", "csisolatin5"},
{"iso-ir-149", "korean", "ksc-5601", "ks-c-5601-1987", "ks-c-5601-1989", "csksc56011987"},
{"iso-ir-14", "iso646-jp", "jis-c6220-1969-ro", "jp", "csiso14jisc6220ro"},
{"iso-ir-159", "jis-x0212", "jis-x0212-1990", "jis-x0212.1990-0", "x0212", "csiso159jisx02121990"},
{"iso-ir-166", "tis-620", "tis620", "tis620-0", "tis620.2529-1", "tis620.2533-0", "tis620.2533-1"},
{"iso-ir-230", "tds565"},
{"iso-ir-87", "jis0208", "jis-c6226-1983", "jis-x0208", "jis-x0208-1983", "jis-x0208-1990", "x0208", "csiso87jisx0208"},
{"java"},
{"jisx0201-1976", "jis-x0201", "x0201", "cshalfwidthkatakana"},
{"koi8-r", "cskoi8r"},
{"koi8-ru"},
{"koi8-t"},
{"koi8-u"},
{"kz-1048", "rk1048", "strk1048-2002", "cskz1048"},
{"macarabic"},
{"maccentraleurope"},
{"maccroatian"},
{"maccyrillic"},
{"macgreek"},
{"machebrew"},
{"maciceland"},
{"mac", "macintosh", "macroman", "csmacintosh"},
{"macromania"},
{"macthai"},
{"macturkish"},
{"macukraine"},
{"ms-kanji", "shift-jis", "shift-jis", "sjis", "csshiftjis"},
{" MS-Windows", "Japanese", "(cp932)"},
{"mulelao-1"},
{"nextstep"},
{"riscos-latin1"},
{"shift-jis-2004", "shift-jisx0213"},
{"tcvn", "tcvn-5712", "tcvn5712-1", "tcvn5712-1:1993"},
{"ucs-2be", "unicode-1-1", "unicodebig", "csunicode11"},
{"ucs-2-internal"},
{"ucs-2le", "unicodelittle"},
{"ucs-2-swapped"},
{"ucs-4be"},
{"ucs-4-internal"},
{"ucs-4le"},
{"ucs-4-swapped"},
{"unicode-1-1-utf-7", "utf-7", "csunicode11utf7"},
{"utf-16"},
{"utf-16be"},
{"utf-16le"},
{"utf-32"},
{"utf-32be"},
{"utf-32le"},
{"utf-8"},
{"utf-8-mac", "utf8-mac"},
{"viscii", "viscii1.1-1", "csviscii"},
{"windows-31j", "cp932"},
}

View File

@ -0,0 +1,162 @@
package charset
import (
"encoding/json"
"fmt"
"os"
"sync"
)
var (
readLocalCharsetsOnce sync.Once
localCharsets = make(map[string]*localCharset)
)
type localCharset struct {
Charset
arg string
*class
}
// A class of character sets.
// Each class can be instantiated with an argument specified in the config file.
// Many character sets can use a single class.
type class struct {
from, to func(arg string) (Translator, error)
}
// The set of classes, indexed by class name.
var classes = make(map[string]*class)
func registerClass(charset string, from, to func(arg string) (Translator, error)) {
classes[charset] = &class{from, to}
}
type localFactory struct{}
func (f localFactory) TranslatorFrom(name string) (Translator, error) {
f.init()
name = NormalizedName(name)
cs := localCharsets[name]
if cs == nil {
return nil, fmt.Errorf("character set %q not found", name)
}
if cs.from == nil {
return nil, fmt.Errorf("cannot translate from %q", name)
}
return cs.from(cs.arg)
}
func (f localFactory) TranslatorTo(name string) (Translator, error) {
f.init()
name = NormalizedName(name)
cs := localCharsets[name]
if cs == nil {
return nil, fmt.Errorf("character set %q not found", name)
}
if cs.to == nil {
return nil, fmt.Errorf("cannot translate to %q", name)
}
return cs.to(cs.arg)
}
func (f localFactory) Names() []string {
f.init()
var names []string
for name, cs := range localCharsets {
// add names only for non-aliases.
if localCharsets[cs.Name] == cs {
names = append(names, name)
}
}
return names
}
func (f localFactory) Info(name string) *Charset {
f.init()
lcs := localCharsets[NormalizedName(name)]
if lcs == nil {
return nil
}
// copy the charset info so that callers can't mess with it.
cs := lcs.Charset
return &cs
}
func (f localFactory) init() {
readLocalCharsetsOnce.Do(readLocalCharsets)
}
// charsetEntry is the data structure for one entry in the JSON config file.
// If Alias is non-empty, it should be the canonical name of another
// character set; otherwise Class should be the name
// of an entry in classes, and Arg is the argument for
// instantiating it.
type charsetEntry struct {
Aliases []string
Desc string
Class string
Arg string
}
// readCharsets reads the JSON config file.
// It's done once only, when first needed.
func readLocalCharsets() {
csdata, err := readFile("charsets.json")
if err != nil {
fmt.Fprintf(os.Stderr, "charset: cannot open \"charsets.json\": %v\n", err)
return
}
var entries map[string]charsetEntry
err = json.Unmarshal(csdata, &entries)
if err != nil {
fmt.Fprintf(os.Stderr, "charset: cannot decode config file: %v\n", err)
}
for name, e := range entries {
class := classes[e.Class]
if class == nil {
continue
}
name = NormalizedName(name)
for i, a := range e.Aliases {
e.Aliases[i] = NormalizedName(a)
}
cs := &localCharset{
Charset: Charset{
Name: name,
Aliases: e.Aliases,
Desc: e.Desc,
NoFrom: class.from == nil,
NoTo: class.to == nil,
},
arg: e.Arg,
class: class,
}
localCharsets[cs.Name] = cs
for _, a := range cs.Aliases {
localCharsets[a] = cs
}
}
}
// A general cache store that local character set translators
// can use for persistent storage of data.
var (
cacheMutex sync.Mutex
cacheStore = make(map[interface{}]interface{})
)
func cache(key interface{}, f func() (interface{}, error)) (interface{}, error) {
cacheMutex.Lock()
defer cacheMutex.Unlock()
if x := cacheStore[key]; x != nil {
return x, nil
}
x, err := f()
if err != nil {
return nil, err
}
cacheStore[key] = x
return x, err
}

View File

@ -0,0 +1,110 @@
package charset
import (
"encoding/binary"
"errors"
"unicode/utf8"
)
func init() {
registerClass("utf16", fromUTF16, toUTF16)
}
type translateFromUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
data = data[0 : len(data)&^1] // round to even number of bytes.
if len(data) < 2 {
return 0, nil, nil
}
n := 0
if p.first && p.endian == nil {
switch binary.BigEndian.Uint16(data) {
case 0xfeff:
p.endian = binary.BigEndian
data = data[2:]
n += 2
case 0xfffe:
p.endian = binary.LittleEndian
data = data[2:]
n += 2
default:
p.endian = guessEndian(data)
}
p.first = false
}
p.scratch = p.scratch[:0]
for ; len(data) > 0; data = data[2:] {
p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
n += 2
}
return n, p.scratch, nil
}
func guessEndian(data []byte) binary.ByteOrder {
// XXX TODO
return binary.LittleEndian
}
type translateToUTF16 struct {
first bool
endian binary.ByteOrder
scratch []byte
}
func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
if p.first {
p.scratch = p.scratch[0:2]
p.endian.PutUint16(p.scratch, 0xfeff)
p.first = false
}
n := 0
for len(data) > 0 {
if !utf8.FullRune(data) && !eof {
break
}
r, size := utf8.DecodeRune(data)
// TODO if r > 65535?
slen := len(p.scratch)
p.scratch = p.scratch[0 : slen+2]
p.endian.PutUint16(p.scratch[slen:], uint16(r))
data = data[size:]
n += size
}
return n, p.scratch, nil
}
func getEndian(arg string) (binary.ByteOrder, error) {
switch arg {
case "le":
return binary.LittleEndian, nil
case "be":
return binary.BigEndian, nil
case "":
return nil, nil
}
return nil, errors.New("charset: unknown utf16 endianness")
}
func fromUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateFromUTF16{first: true, endian: endian}, nil
}
func toUTF16(arg string) (Translator, error) {
endian, err := getEndian(arg)
if err != nil {
return nil, err
}
return &translateToUTF16{first: false, endian: endian}, nil
}

View File

@ -0,0 +1,51 @@
package charset
import (
"unicode/utf8"
)
func init() {
registerClass("utf8", toUTF8, toUTF8)
}
type translateToUTF8 struct {
scratch []byte
}
var errorBytes = []byte(string(utf8.RuneError))
const errorRuneLen = len(string(utf8.RuneError))
func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
buf := p.scratch[:0]
for i := 0; i < len(data); {
// fast path for ASCII
if b := data[i]; b < utf8.RuneSelf {
buf = append(buf, b)
i++
continue
}
_, size := utf8.DecodeRune(data[i:])
if size == 1 {
if !eof && !utf8.FullRune(data) {
// When DecodeRune has converted only a single
// byte, we know there must be some kind of error
// because we know the byte's not ASCII.
// If we aren't at EOF, and it's an incomplete
// rune encoding, then we return to process
// the final bytes in a subsequent call.
return i, buf, nil
}
buf = append(buf, errorBytes...)
} else {
buf = append(buf, data[i:i+size]...)
}
i += size
}
return len(data), buf, nil
}
func toUTF8(arg string) (Translator, error) {
return new(translateToUTF8), nil
}

103
vendor/github.com/paulrosania/go-charset/cmd/tcs/tcs.go generated vendored Normal file
View File

@ -0,0 +1,103 @@
package main
import (
"bytes"
"flag"
"fmt"
"github.com/paulrosania/go-charset/charset"
_ "github.com/paulrosania/go-charset/charset/iconv"
"io"
"os"
"strings"
)
var listFlag = flag.Bool("l", false, "list available character sets")
var verboseFlag = flag.Bool("v", false, "list more information")
var fromCharset = flag.String("f", "utf-8", "translate from this character set")
var toCharset = flag.String("t", "utf-8", "translate to this character set")
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "usage: tcs [-l] [-v] [charset]\n")
fmt.Fprintf(os.Stderr, "\ttcs [-f charset] [-t charset] [file]\n")
}
flag.Parse()
if *listFlag {
cs := ""
switch flag.NArg() {
case 1:
cs = flag.Arg(0)
case 0:
default:
flag.Usage()
}
listCharsets(*verboseFlag, cs)
return
}
var f *os.File
switch flag.NArg() {
case 0:
f = os.Stdin
case 1:
var err error
f, err = os.Open(flag.Arg(0))
if err != nil {
fatalf("cannot open %q: %v", err)
}
}
r, err := charset.NewReader(*fromCharset, f)
if err != nil {
fatalf("cannot translate from %q: %v", *fromCharset, err)
}
w, err := charset.NewWriter(*toCharset, os.Stdout)
if err != nil {
fatalf("cannot translate to %q: ", err)
}
_, err = io.Copy(w, r)
if err != nil {
fatalf("%v", err)
}
}
func listCharsets(verbose bool, csname string) {
var buf bytes.Buffer
if !verbose {
if csname != "" {
cs := charset.Info(csname)
if cs == nil {
fatalf("no such charset %q", csname)
}
fmt.Fprintf(&buf, "%s %s\n", cs.Name, strings.Join(cs.Aliases, " "))
} else {
fmt.Fprintf(&buf, "%v\n", strings.Join(charset.Names(), " "))
}
} else {
var charsets []*charset.Charset
if csname != "" {
cs := charset.Info(csname)
if cs == nil {
fatalf("no such charset %q", csname)
}
charsets = []*charset.Charset{cs}
} else {
for _, name := range charset.Names() {
if cs := charset.Info(name); cs != nil {
charsets = append(charsets, cs)
}
}
}
for _, cs := range charsets {
fmt.Fprintf(&buf, "%s %s\n", cs.Name, strings.Join(cs.Aliases, " "))
if cs.Desc != "" {
fmt.Fprintf(&buf, "\t%s\n", cs.Desc)
}
}
}
os.Stdout.Write(buf.Bytes())
}
func fatalf(f string, a ...interface{}) {
s := fmt.Sprintf(f, a...)
fmt.Fprintf(os.Stderr, "%s\n", s)
os.Exit(2)
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("charsets.json", func() (io.ReadCloser, error) {
r := strings.NewReader("{\n\"8bit\": {\n\t\"Desc\": \"raw 8-bit data\",\n\t\"Class\": \"8bit\",\n\t\"Comment\": \"special class for raw 8bit data that has been converted to utf-8\"\n},\n\"big5\": {\n\t\"Desc\": \"Big 5 (HKU)\",\n\t\"Class\": \"big5\",\n\t\"Comment\": \"Traditional Chinese\"\n},\n\"euc-jp\": {\n\t\"Aliases\":[\"x-euc-jp\"],\n\t\"Desc\": \"Japanese Extended UNIX Code\",\n\t\"Class\": \"euc-jp\"\n},\n\"gb2312\": {\n\t\"Aliases\":[\"iso-ir-58\", \"chinese\", \"gb_2312-80\"],\n\t\"Desc\": \"Chinese mixed one byte\",\n\t\"Class\": \"gb2312\"\n},\n\"ibm437\": {\n\t\"Aliases\":[\"437\", \"cp437\"],\n\t\"Desc\": \"IBM PC: CP 437\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm437.cp\",\n\t\"Comment\": \"originally from jhelling@cs.ruu.nl (Jeroen Hellingman)\"\n},\n\"ibm850\": {\n\t\"Aliases\":[\"850\", \"cp850\"],\n\t\"Desc\": \"IBM PS/2: CP 850\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm850.cp\",\n\t\"Comment\": \"originally from jhelling@cs.ruu.nl (Jeroen Hellingman)\"\n},\n\"ibm866\": {\n\t\"Aliases\":[\"cp866\", \"866\"],\n\t\"Desc\": \"Russian MS-DOS CP 866\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm866.cp\"\n},\n\"iso-8859-1\": {\n\t\"Aliases\":[\"iso-ir-100\", \"ibm819\", \"l1\", \"iso8859-1\", \"iso-latin-1\", \"iso_8859-1:1987\", \"cp819\", \"iso_8859-1\", \"iso8859_1\", \"latin1\"],\n\t\"Desc\": \"Latin-1\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-1.cp\"\n},\n\"iso-8859-10\": {\n\t\"Aliases\":[\"iso_8859-10:1992\", \"l6\", \"iso-ir-157\", \"latin6\"],\n\t\"Desc\": \"Latin-6\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-10.cp\",\n\t\"Comment\": \"originally from dkuug.dk:i18n/charmaps/ISO_8859-10:1993\"\n},\n\"iso-8859-15\": {\n\t\"Aliases\":[\"l9-iso-8859-15\", \"latin9\"],\n\t\"Desc\": \"Latin-9\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-15.cp\"\n},\n\"iso-8859-2\": {\n\t\"Aliases\":[\"iso-ir-101\", \"iso_8859-2:1987\", \"l2\", \"iso_8859-2\", \"latin2\"],\n\t\"Desc\": \"Latin-2\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-2.cp\"\n},\n\"iso-8859-3\": {\n\t\"Aliases\":[\"iso-ir-109\", \"l3\", \"iso_8859-3:1988\", \"iso_8859-3\", \"latin3\"],\n\t\"Desc\": \"Latin-3\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-3.cp\"\n},\n\"iso-8859-4\": {\n\t\"Aliases\":[\"iso-ir-110\", \"iso_8859-4:1988\", \"l4\", \"iso_8859-4\", \"latin4\"],\n\t\"Desc\": \"Latin-4\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-4.cp\"\n},\n\"iso-8859-5\": {\n\t\"Aliases\":[\"cyrillic\", \"iso_8859-5\", \"iso-ir-144\", \"iso_8859-5:1988\"],\n\t\"Desc\": \"Part 5 (Cyrillic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-5.cp\"\n},\n\"iso-8859-6\": {\n\t\"Aliases\":[\"ecma-114\", \"iso_8859-6:1987\", \"arabic\", \"iso_8859-6\", \"asmo-708\", \"iso-ir-127\"],\n\t\"Desc\": \"Part 6 (Arabic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-6.cp\"\n},\n\"iso-8859-7\": {\n\t\"Aliases\":[\"greek8\", \"elot_928\", \"ecma-118\", \"greek\", \"iso_8859-7\", \"iso_8859-7:1987\", \"iso-ir-126\"],\n\t\"Desc\": \"Part 7 (Greek)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-7.cp\"\n},\n\"iso-8859-8\": {\n\t\"Aliases\":[\"iso_8859-8:1988\", \"hebrew\", \"iso_8859-8\", \"iso-ir-138\"],\n\t\"Desc\": \"Part 8 (Hebrew)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-8.cp\"\n},\n\"iso-8859-9\": {\n\t\"Aliases\":[\"l5\", \"iso_8859-9:1989\", \"iso_8859-9\", \"iso-ir-148\", \"latin5\"],\n\t\"Desc\": \"Latin-5\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-9.cp\"\n},\n\"koi8-r\": {\n\t\"Desc\": \"KOI8-R (RFC1489)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"koi8-r.cp\"\n},\n\"shift_jis\": {\n\t\"Aliases\":[\"sjis\", \"ms_kanji\", \"x-sjis\"],\n\t\"Desc\": \"Shift-JIS Japanese\",\n\t\"Class\": \"cp932\",\n\t\"Arg\": \"shiftjis\"\n},\n\"us-ascii\": {\n\t\"Aliases\":[\"ascii\"],\n\t\"Desc\": \"US-ASCII (RFC20)\",\n\t\"Class\": \"ascii\"\n},\n\"utf-16\": {\n\t\"Aliases\":[\"utf16\"],\n\t\"Desc\": \"Unicode UTF-16\",\n\t\"Class\": \"utf16\"\n},\n\"utf-16be\": {\n\t\"Aliases\":[\"utf16be\"],\n\t\"Desc\": \"Unicode UTF-16 big endian\",\n\t\"Class\": \"utf16\",\n\t\"Arg\": \"be\"\n},\n\"utf-16le\": {\n\t\"Aliases\":[\"utf16le\"],\n\t\"Desc\": \"Unicode UTF-16 little endian\",\n\t\"Class\": \"utf16\",\n\t\"Arg\": \"le\"\n},\n\"utf-8\": {\n\t\"Aliases\":[\"utf8\"],\n\t\"Desc\": \"Unicode UTF-8\",\n\t\"Class\": \"utf8\"\n},\n\"windows-1250\": {\n\t\"Desc\": \"MS Windows CP 1250 (Central Europe)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1250.cp\"\n},\n\"windows-1251\": {\n\t\"Desc\": \"MS Windows CP 1251 (Cyrillic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1251.cp\"\n},\n\"windows-1252\": {\n\t\"Desc\": \"MS Windows CP 1252 (Latin 1)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1252.cp\"\n},\n\"windows-31j\": {\n\t\"Aliases\":[\"cp932\"],\n\t\"Desc\": \"MS-Windows Japanese (cp932)\",\n\t\"Class\": \"cp932\",\n\t\"Arg\": \"cp932\"\n}\n}\n")
return ioutil.NopCloser(r), nil
})
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("ibm437.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞∅∈∩≡±≥≤⌠⌡÷≈°•·√ⁿ²∎\u00a0")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("ibm850.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø×ƒáíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈıÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u00ad±‗¾¶§÷¸°¨·¹³²∎\u00a0")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("ibm866.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп<D0BE><D0BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>рстуфхцчшщъыьэюяЁё<D081><D191><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-1.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-10.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ĄĒĢĪĨͧĻĐŠŦŽ\u00adŪŊ°ąēģīĩķ·ļĐšŧž—ūŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞßāáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-15.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£€¥Š§š©ª«¬\u00ad®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-2.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\u00adŽŻ°ą˛ł´ľśˇ¸šşťź˝žżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-3.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0Ħ˘£¤<C2A3>Ĥ§¨İŞĞĴ\u00ad<61>ݰħ²³´µĥ·¸ışğĵ½<C4B5>żÀÁÂ<C381>ÄĊĈÇÈÉÊËÌÍÎÏ<C38E>ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ<C3A1>äċĉçèéêëìíîï<C3AE>ñòóôġö÷ĝùúûüŭŝ˙")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-4.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ĄĸŖ¤Ĩϧ¨ŠĒĢŦ\u00adޝ°ą˛ŗ´ĩšēģŧŊžŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪßāáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-5.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ЁЂЃЄЅІЇЈЉЊЋЌ\u00adЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-6.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0<61><30><EFBFBD>¤<EFBFBD><C2A4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>،\u00ad<61><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>؛<EFBFBD><D89B><EFBFBD>؟<EFBFBD>ءآأؤإئابةتثجحخدذرزسشصضطظعغ<D8B9><D8BA><EFBFBD><EFBFBD><EFBFBD>ـفقكلمنهوىيًٌٍَُِّْ<D991><D992><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-7.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0£<E28099><C2A3>¦§¨©<C2A8>«¬\u00ad<61>―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ<CEA0>ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ<CF8D>")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-8.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0<61>¢£¤¥¦§¨©×«¬\u00ad®‾°±²³´µ¶·¸¹÷»¼½¾<C2BD><C2BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>‗אבגדהוזחטיךכלםמןנסעףפץצקרשת<D7A9><D7AA><EFBFBD><EFBFBD><EFBFBD>")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("iso-8859-9.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞßàáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("jisx0201kana.dat", func() (io.ReadCloser, error) {
r := strings.NewReader("。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("koi8-r.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥\u00a0⌡°²·÷═║╒ё╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("windows-1250.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f€<66><EFBFBD>„…†‡<E280A0>‰ŠŚŤŽŹ<C5BD>“”•<E28093>™šśťžź\u00a0ˇ˘Ł¤Ą¦§¨©Ş«¬\u00ad®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("windows-1251.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fЂЃѓ„…†‡<E280A0>‰ЉЊЌЋЏђ“”•<E28093>™љњќћџ\u00a0ЎўЈ¤Ґ¦§Ё©Є«¬\u00ad®Ї°±Ііґµ¶·ё№є»јЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя")
return ioutil.NopCloser(r), nil
})
}

View File

@ -0,0 +1,18 @@
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile("windows-1252.cp", func() (io.ReadCloser, error) {
r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f€<66>ƒ„…†‡ˆ‰ŠŒ<E280B9>Ž<EFBFBD><C5BD>“”•˜™šœ<E280BA>žŸ\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ")
return ioutil.NopCloser(r), nil
})
}

6
vendor/github.com/paulrosania/go-charset/data/doc.go generated vendored Normal file
View File

@ -0,0 +1,6 @@
// The data package embeds all the charset
// data files as Go data. It registers the data with the charset
// package as a side effect of its import. To use:
//
// import _ "github.com/paulrosania/go-charset"
package data

View File

@ -0,0 +1,97 @@
// +build ignore
// go run generate.go && go fmt
// The generate-charset-data command generates the Go source code
// for github.com/paulrosania/go-charset/data from the data files
// found in github.com/paulrosania/go-charset/datafiles.
// It should be run in the go-charset root directory.
// The resulting Go files will need gofmt'ing.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"text/template"
)
type info struct {
Path string
}
var tfuncs = template.FuncMap{
"basename": func(s string) string {
return filepath.Base(s)
},
"read": func(path string) ([]byte, error) {
return ioutil.ReadFile(path)
},
}
var tmpl = template.Must(template.New("").Funcs(tfuncs).Parse(`
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
package data
import (
"github.com/paulrosania/go-charset/charset"
"io"
"io/ioutil"
"strings"
)
func init() {
charset.RegisterDataFile({{basename .Path | printf "%q"}}, func() (io.ReadCloser, error) {
r := strings.NewReader({{read .Path | printf "%q"}})
return ioutil.NopCloser(r), nil
})
}
`))
var docTmpl = template.Must(template.New("").Funcs(tfuncs).Parse(`
// This file is automatically generated by generate-charset-data.
// Do not hand-edit.
// The {{basename .Package}} package embeds all the charset
// data files as Go data. It registers the data with the charset
// package as a side effect of its import. To use:
//
// import _ "github.com/paulrosania/go-charset"
package {{basename .Package}}
`))
func main() {
dataDir := filepath.Join("..", "datafiles")
d, err := os.Open(dataDir)
if err != nil {
fatalf("%v", err)
}
names, err := d.Readdirnames(0)
if err != nil {
fatalf("cannot read datafiles dir: %v", err)
}
for _, name := range names {
writeFile("data_"+name+".go", tmpl, info{
Path: filepath.Join(dataDir, name),
})
}
}
func writeFile(name string, t *template.Template, data interface{}) {
w, err := os.Create(name)
if err != nil {
fatalf("cannot create output file: %v", err)
}
defer w.Close()
err = t.Execute(w, data)
if err != nil {
fatalf("template execute %q: %v", name, err)
}
}
func fatalf(f string, a ...interface{}) {
fmt.Fprintf(os.Stderr, "%s\n", fmt.Sprintf(f, a...))
os.Exit(2)
}