matterbridge/vendor/github.com/paulrosania/go-charset/charset/charset.go

// The charset package implements translation between character sets.
// It uses Unicode as the intermediate representation.
// Because it can be large, the character set data is separated
// from the charset package. It can be embedded in the Go
// executable by importing the data package:
//
//	import _ "github.com/paulrosania/go-charset/data"
//
// It can also made available in a data directory (by settting CharsetDir).
package charset

import (
	"io"
	"strings"
	"unicode/utf8"
)

// Charset holds information about a given character set.
type Charset struct {
	Name    string   // Canonical name of character set.
	Aliases []string // Known aliases.
	Desc    string   // Description.
	NoFrom  bool     // Not possible to translate from this charset.
	NoTo    bool     // Not possible to translate to this charset.
}

// Translator represents a character set converter.
// The Translate method translates the given data,
// and returns the number of bytes of data consumed,
// a slice containing the converted data (which may be
// overwritten on the next call to Translate), and any
// conversion error. If eof is true, the data represents
// the final bytes of the input.
type Translator interface {
	Translate(data []byte, eof bool) (n int, cdata []byte, err error)
}

// A Factory can be used to make character set translators.
type Factory interface {
	// TranslatorFrom creates a translator that will translate from the named character
	// set to UTF-8.
	TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.

	// TranslatorTo creates a translator that will translate from UTF-8 to the named character set.
	TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.

	// Names returns all the character set names accessibile through the factory.
	Names() []string

	// Info returns information on the named character set. It returns nil if the
	// factory doesn't recognise the given name.
	Info(name string) *Charset
}

var factories = []Factory{localFactory{}}

// Register registers a new Factory which will be consulted when NewReader
// or NewWriter needs a character set translator for a given name.
func Register(factory Factory) {
	factories = append(factories, factory)
}

// NewReader returns a new Reader that translates from the named
// character set to UTF-8 as it reads r.
func NewReader(charset string, r io.Reader) (io.Reader, error) {
	tr, err := TranslatorFrom(charset)
	if err != nil {
		return nil, err
	}
	return NewTranslatingReader(r, tr), nil
}

// NewWriter returns a new WriteCloser writing to w.  It converts writes
// of UTF-8 text into writes on w of text in the named character set.
// The Close is necessary to flush any remaining partially translated
// characters to the output.
func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {
	tr, err := TranslatorTo(charset)
	if err != nil {
		return nil, err
	}
	return NewTranslatingWriter(w, tr), nil
}

// Info returns information about a character set, or nil
// if the character set is not found.
func Info(name string) *Charset {
	for _, f := range factories {
		if info := f.Info(name); info != nil {
			return info
		}
	}
	return nil
}

// Names returns the canonical names of all supported character sets, in alphabetical order.
func Names() []string {
	// TODO eliminate duplicates
	var names []string
	for _, f := range factories {
		names = append(names, f.Names()...)
	}
	return names
}

// TranslatorFrom returns a translator that will translate from
// the named character set to UTF-8.
func TranslatorFrom(charset string) (Translator, error) {
	var err error
	var tr Translator
	for _, f := range factories {
		tr, err = f.TranslatorFrom(charset)
		if err == nil {
			break
		}
	}
	if tr == nil {
		return nil, err
	}
	return tr, nil
}

// TranslatorTo returns a translator that will translate from UTF-8
// to the named character set.
func TranslatorTo(charset string) (Translator, error) {
	var err error
	var tr Translator
	for _, f := range factories {
		tr, err = f.TranslatorTo(charset)
		if err == nil {
			break
		}
	}
	if tr == nil {
		return nil, err
	}
	return tr, nil
}

func normalizedChar(c rune) rune {
	switch {
	case c >= 'A' && c <= 'Z':
		c = c - 'A' + 'a'
	case c == '_':
		c = '-'
	}
	return c
}

// NormalisedName returns s with all Roman capitals
// mapped to lower case, and '_' mapped to '-'
func NormalizedName(s string) string {
	return strings.Map(normalizedChar, s)
}

type translatingWriter struct {
	w   io.Writer
	tr  Translator
	buf []byte // unconsumed data from writer.
}

// NewTranslatingWriter returns a new WriteCloser writing to w.
// It passes the written bytes through the given Translator.
func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {
	return &translatingWriter{w: w, tr: tr}
}

func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {
	wdata := data
	if len(w.buf) > 0 {
		w.buf = append(w.buf, data...)
		wdata = w.buf
	}
	n, cdata, err := w.tr.Translate(wdata, false)
	if err != nil {
		// TODO
	}
	if n > 0 {
		_, err = w.w.Write(cdata)
		if err != nil {
			return 0, err
		}
	}
	w.buf = w.buf[:0]
	if n < len(wdata) {
		w.buf = append(w.buf, wdata[n:]...)
	}
	return len(data), nil
}

func (p *translatingWriter) Close() error {
	for {
		n, data, err := p.tr.Translate(p.buf, true)
		p.buf = p.buf[n:]
		if err != nil {
			// TODO
		}
		// If the Translator produces no data
		// at EOF, then assume that it never will.
		if len(data) == 0 {
			break
		}
		n, err = p.w.Write(data)
		if err != nil {
			return err
		}
		if n < len(data) {
			return io.ErrShortWrite
		}
		if len(p.buf) == 0 {
			break
		}
	}
	return nil
}

type translatingReader struct {
	r     io.Reader
	tr    Translator
	cdata []byte // unconsumed data from converter.
	rdata []byte // unconverted data from reader.
	err   error  // final error from reader.
}

// NewTranslatingReader returns a new Reader that
// translates data using the given Translator as it reads r.
func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {
	return &translatingReader{r: r, tr: tr}
}

func (r *translatingReader) Read(buf []byte) (int, error) {
	for {
		if len(r.cdata) > 0 {
			n := copy(buf, r.cdata)
			r.cdata = r.cdata[n:]
			return n, nil
		}
		if r.err == nil {
			r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))
			n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])
			// Guard against non-compliant Readers.
			if n == 0 && err == nil {
				err = io.EOF
			}
			r.rdata = r.rdata[0 : len(r.rdata)+n]
			r.err = err
		} else if len(r.rdata) == 0 {
			break
		}
		nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)
		if cvterr != nil {
			// TODO
		}
		r.cdata = cdata

		// Ensure that we consume all bytes at eof
		// if the converter refuses them.
		if nc == 0 && r.err != nil {
			nc = len(r.rdata)
		}

		// Copy unconsumed data to the start of the rdata buffer.
		r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]
	}
	return 0, r.err
}

// ensureCap returns s with a capacity of at least n bytes.
// If cap(s) < n, then it returns a new copy of s with the
// required capacity.
func ensureCap(s []byte, n int) []byte {
	if n <= cap(s) {
		return s
	}
	// logic adapted from appendslice1 in runtime
	m := cap(s)
	if m == 0 {
		m = n
	} else {
		for {
			if m < 1024 {
				m += m
			} else {
				m += m / 4
			}
			if m >= n {
				break
			}
		}
	}
	t := make([]byte, len(s), m)
	copy(t, s)
	return t
}

func appendRune(buf []byte, r rune) []byte {
	n := len(buf)
	buf = ensureCap(buf, n+utf8.UTFMax)
	nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
	return buf[0 : n+nu]
}
Add go-charset and chardet to vendor 2017-07-07 21:34:05 +00:00			`// The charset package implements translation between character sets.`
			`// It uses Unicode as the intermediate representation.`
			`// Because it can be large, the character set data is separated`
			`// from the charset package. It can be embedded in the Go`
			`// executable by importing the data package:`
			`//`
			`// import _ "github.com/paulrosania/go-charset/data"`
			`//`
			`// It can also made available in a data directory (by settting CharsetDir).`
			`package charset`

			`import (`
			`"io"`
			`"strings"`
			`"unicode/utf8"`
			`)`

			`// Charset holds information about a given character set.`
			`type Charset struct {`
			`Name string // Canonical name of character set.`
			`Aliases []string // Known aliases.`
			`Desc string // Description.`
			`NoFrom bool // Not possible to translate from this charset.`
			`NoTo bool // Not possible to translate to this charset.`
			`}`

			`// Translator represents a character set converter.`
			`// The Translate method translates the given data,`
			`// and returns the number of bytes of data consumed,`
			`// a slice containing the converted data (which may be`
			`// overwritten on the next call to Translate), and any`
			`// conversion error. If eof is true, the data represents`
			`// the final bytes of the input.`
			`type Translator interface {`
			`Translate(data []byte, eof bool) (n int, cdata []byte, err error)`
			`}`

			`// A Factory can be used to make character set translators.`
			`type Factory interface {`
			`// TranslatorFrom creates a translator that will translate from the named character`
			`// set to UTF-8.`
			`TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.`

			`// TranslatorTo creates a translator that will translate from UTF-8 to the named character set.`
			`TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.`

			`// Names returns all the character set names accessibile through the factory.`
			`Names() []string`

			`// Info returns information on the named character set. It returns nil if the`
			`// factory doesn't recognise the given name.`
			`Info(name string) *Charset`
			`}`

			`var factories = []Factory{localFactory{}}`

			`// Register registers a new Factory which will be consulted when NewReader`
			`// or NewWriter needs a character set translator for a given name.`
			`func Register(factory Factory) {`
			`factories = append(factories, factory)`
			`}`

			`// NewReader returns a new Reader that translates from the named`
			`// character set to UTF-8 as it reads r.`
			`func NewReader(charset string, r io.Reader) (io.Reader, error) {`
			`tr, err := TranslatorFrom(charset)`
			`if err != nil {`
			`return nil, err`
			`}`
			`return NewTranslatingReader(r, tr), nil`
			`}`

			`// NewWriter returns a new WriteCloser writing to w. It converts writes`
			`// of UTF-8 text into writes on w of text in the named character set.`
			`// The Close is necessary to flush any remaining partially translated`
			`// characters to the output.`
			`func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {`
			`tr, err := TranslatorTo(charset)`
			`if err != nil {`
			`return nil, err`
			`}`
			`return NewTranslatingWriter(w, tr), nil`
			`}`

			`// Info returns information about a character set, or nil`
			`// if the character set is not found.`
			`func Info(name string) *Charset {`
			`for _, f := range factories {`
			`if info := f.Info(name); info != nil {`
			`return info`
			`}`
			`}`
			`return nil`
			`}`

			`// Names returns the canonical names of all supported character sets, in alphabetical order.`
			`func Names() []string {`
			`// TODO eliminate duplicates`
			`var names []string`
			`for _, f := range factories {`
			`names = append(names, f.Names()...)`
			`}`
			`return names`
			`}`

			`// TranslatorFrom returns a translator that will translate from`
			`// the named character set to UTF-8.`
			`func TranslatorFrom(charset string) (Translator, error) {`
			`var err error`
			`var tr Translator`
			`for _, f := range factories {`
			`tr, err = f.TranslatorFrom(charset)`
			`if err == nil {`
			`break`
			`}`
			`}`
			`if tr == nil {`
			`return nil, err`
			`}`
			`return tr, nil`
			`}`

			`// TranslatorTo returns a translator that will translate from UTF-8`
			`// to the named character set.`
			`func TranslatorTo(charset string) (Translator, error) {`
			`var err error`
			`var tr Translator`
			`for _, f := range factories {`
			`tr, err = f.TranslatorTo(charset)`
			`if err == nil {`
			`break`
			`}`
			`}`
			`if tr == nil {`
			`return nil, err`
			`}`
			`return tr, nil`
			`}`

			`func normalizedChar(c rune) rune {`
			`switch {`
			`case c >= 'A' && c <= 'Z':`
			`c = c - 'A' + 'a'`
			`case c == '_':`
			`c = '-'`
			`}`
			`return c`
			`}`

			`// NormalisedName returns s with all Roman capitals`
			`// mapped to lower case, and '_' mapped to '-'`
			`func NormalizedName(s string) string {`
			`return strings.Map(normalizedChar, s)`
			`}`

			`type translatingWriter struct {`
			`w io.Writer`
			`tr Translator`
			`buf []byte // unconsumed data from writer.`
			`}`

			`// NewTranslatingWriter returns a new WriteCloser writing to w.`
			`// It passes the written bytes through the given Translator.`
			`func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {`
			`return &translatingWriter{w: w, tr: tr}`
			`}`

			`func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {`
			`wdata := data`
			`if len(w.buf) > 0 {`
			`w.buf = append(w.buf, data...)`
			`wdata = w.buf`
			`}`
			`n, cdata, err := w.tr.Translate(wdata, false)`
			`if err != nil {`
			`// TODO`
			`}`
			`if n > 0 {`
			`_, err = w.w.Write(cdata)`
			`if err != nil {`
			`return 0, err`
			`}`
			`}`
			`w.buf = w.buf[:0]`
			`if n < len(wdata) {`
			`w.buf = append(w.buf, wdata[n:]...)`
			`}`
			`return len(data), nil`
			`}`

			`func (p *translatingWriter) Close() error {`
			`for {`
			`n, data, err := p.tr.Translate(p.buf, true)`
			`p.buf = p.buf[n:]`
			`if err != nil {`
			`// TODO`
			`}`
			`// If the Translator produces no data`
			`// at EOF, then assume that it never will.`
			`if len(data) == 0 {`
			`break`
			`}`
			`n, err = p.w.Write(data)`
			`if err != nil {`
			`return err`
			`}`
			`if n < len(data) {`
			`return io.ErrShortWrite`
			`}`
			`if len(p.buf) == 0 {`
			`break`
			`}`
			`}`
			`return nil`
			`}`

			`type translatingReader struct {`
			`r io.Reader`
			`tr Translator`
			`cdata []byte // unconsumed data from converter.`
			`rdata []byte // unconverted data from reader.`
			`err error // final error from reader.`
			`}`

			`// NewTranslatingReader returns a new Reader that`
			`// translates data using the given Translator as it reads r.`
			`func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {`
			`return &translatingReader{r: r, tr: tr}`
			`}`

			`func (r *translatingReader) Read(buf []byte) (int, error) {`
			`for {`
			`if len(r.cdata) > 0 {`
			`n := copy(buf, r.cdata)`
			`r.cdata = r.cdata[n:]`
			`return n, nil`
			`}`
			`if r.err == nil {`
			`r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))`
			`n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])`
			`// Guard against non-compliant Readers.`
			`if n == 0 && err == nil {`
			`err = io.EOF`
			`}`
			`r.rdata = r.rdata[0 : len(r.rdata)+n]`
			`r.err = err`
			`} else if len(r.rdata) == 0 {`
			`break`
			`}`
			`nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)`
			`if cvterr != nil {`
			`// TODO`
			`}`
			`r.cdata = cdata`

			`// Ensure that we consume all bytes at eof`
			`// if the converter refuses them.`
			`if nc == 0 && r.err != nil {`
			`nc = len(r.rdata)`
			`}`

			`// Copy unconsumed data to the start of the rdata buffer.`
			`r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]`
			`}`
			`return 0, r.err`
			`}`

			`// ensureCap returns s with a capacity of at least n bytes.`
			`// If cap(s) < n, then it returns a new copy of s with the`
			`// required capacity.`
			`func ensureCap(s []byte, n int) []byte {`
			`if n <= cap(s) {`
			`return s`
			`}`
			`// logic adapted from appendslice1 in runtime`
			`m := cap(s)`
			`if m == 0 {`
			`m = n`
			`} else {`
			`for {`
			`if m < 1024 {`
			`m += m`
			`} else {`
			`m += m / 4`
			`}`
			`if m >= n {`
			`break`
			`}`
			`}`
			`}`
			`t := make([]byte, len(s), m)`
			`copy(t, s)`
			`return t`
			`}`

			`func appendRune(buf []byte, r rune) []byte {`
			`n := len(buf)`
			`buf = ensureCap(buf, n+utf8.UTFMax)`
			`nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)`
			`return buf[0 : n+nu]`
			`}`