From f0f801402d95b1f49018fa9af77c341ff4c4de5f Mon Sep 17 00:00:00 2001 From: Wim Date: Sat, 19 Mar 2022 23:14:56 +0100 Subject: [PATCH] Refactor utf-8 conversion (irc) (#1767) --- bridge/irc/charset.go | 32 ++ bridge/irc/handlers.go | 7 +- go.mod | 3 +- go.sum | 2 - vendor/github.com/missdeer/golib/LICENSE | 504 ----------------- .../github.com/missdeer/golib/ic/convutf8.go | 72 --- vendor/github.com/missdeer/golib/ic/ic.go | 31 -- .../x/text/encoding/unicode/override.go | 82 +++ .../x/text/encoding/unicode/unicode.go | 512 ++++++++++++++++++ .../internal/utf8internal/utf8internal.go | 87 +++ vendor/modules.txt | 5 +- 11 files changed, 719 insertions(+), 618 deletions(-) create mode 100644 bridge/irc/charset.go delete mode 100644 vendor/github.com/missdeer/golib/LICENSE delete mode 100644 vendor/github.com/missdeer/golib/ic/convutf8.go delete mode 100644 vendor/github.com/missdeer/golib/ic/ic.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/override.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/unicode.go create mode 100644 vendor/golang.org/x/text/internal/utf8internal/utf8internal.go diff --git a/bridge/irc/charset.go b/bridge/irc/charset.go new file mode 100644 index 00000000..57872ec9 --- /dev/null +++ b/bridge/irc/charset.go @@ -0,0 +1,32 @@ +package birc + +import ( + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/japanese" + "golang.org/x/text/encoding/korean" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" + "golang.org/x/text/encoding/unicode" +) + +var encoders = map[string]encoding.Encoding{ + "utf-8": unicode.UTF8, + "iso-2022-jp": japanese.ISO2022JP, + "big5": traditionalchinese.Big5, + "gbk": simplifiedchinese.GBK, + "euc-kr": korean.EUCKR, + "gb2312": simplifiedchinese.HZGB2312, + "shift-jis": japanese.ShiftJIS, + "euc-jp": japanese.EUCJP, + "gb18030": simplifiedchinese.GB18030, +} + +func toUTF8(from string, input string) string { + enc, ok := encoders[from] + if !ok { + return input + } + + res, _ := enc.NewDecoder().String(input) + return res +} diff --git a/bridge/irc/handlers.go b/bridge/irc/handlers.go index b90fa3af..987df2c5 100644 --- a/bridge/irc/handlers.go +++ b/bridge/irc/handlers.go @@ -11,7 +11,6 @@ import ( "github.com/42wim/matterbridge/bridge/config" "github.com/42wim/matterbridge/bridge/helper" "github.com/lrstanley/girc" - "github.com/missdeer/golib/ic" "github.com/paulrosania/go-charset/charset" "github.com/saintfish/chardet" @@ -24,12 +23,12 @@ func (b *Birc) handleCharset(msg *config.Message) error { if b.GetString("Charset") != "" { switch b.GetString("Charset") { case "gbk", "gb18030", "gb2312", "big5", "euc-kr", "euc-jp", "shift-jis", "iso-2022-jp": - msg.Text = ic.ConvertString("utf-8", b.GetString("Charset"), msg.Text) + msg.Text = toUTF8(b.GetString("Charset"), msg.Text) default: buf := new(bytes.Buffer) w, err := charset.NewWriter(b.GetString("Charset"), buf) if err != nil { - b.Log.Errorf("charset from utf-8 conversion failed: %s", err) + b.Log.Errorf("charset to utf-8 conversion failed: %s", err) return err } fmt.Fprint(w, msg.Text) @@ -227,7 +226,7 @@ func (b *Birc) handlePrivMsg(client *girc.Client, event girc.Event) { } switch mycharset { case "gbk", "gb18030", "gb2312", "big5", "euc-kr", "euc-jp", "shift-jis", "iso-2022-jp": - rmsg.Text = ic.ConvertString("utf-8", b.GetString("Charset"), rmsg.Text) + rmsg.Text = toUTF8(b.GetString("Charset"), rmsg.Text) default: r, err := charset.NewReader(mycharset, strings.NewReader(rmsg.Text)) if err != nil { diff --git a/go.mod b/go.mod index 03e3df85..02c9437f 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,6 @@ require ( github.com/mattermost/mattermost-server/v5 v5.39.3 github.com/mattermost/mattermost-server/v6 v6.4.2 github.com/mattn/godown v0.0.1 - github.com/missdeer/golib v1.0.4 github.com/nelsonken/gomf v0.0.0-20180504123937-a9dd2f9deae9 github.com/paulrosania/go-charset v0.0.0-20190326053356-55c9d7a5834c github.com/rs/xid v1.3.0 @@ -49,6 +48,7 @@ require ( github.com/zfjagann/golang-ring v0.0.0-20210116075443-7c86fdb43134 golang.org/x/image v0.0.0-20220302094943-723b81ca9867 golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a + golang.org/x/text v0.3.7 gomod.garykim.dev/nc-talk v0.3.0 gopkg.in/olahol/melody.v1 v1.0.0-20170518105555-d52139073376 layeh.com/gumble v0.0.0-20200818122324-146f9205029b @@ -126,7 +126,6 @@ require ( golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect - golang.org/x/text v0.3.7 // indirect golang.org/x/time v0.0.0-20201208040808-7e3f01d25324 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.27.1 // indirect diff --git a/go.sum b/go.sum index 0f90c7fe..d5533da2 100644 --- a/go.sum +++ b/go.sum @@ -1163,8 +1163,6 @@ github.com/minio/minio-go/v7 v7.0.16/go.mod h1:pUV0Pc+hPd1nccgmzQF/EXh48l/Z/yps6 github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= -github.com/missdeer/golib v1.0.4 h1:tM7MJIPffXSmwFCTOCMjL5C7JsT5SQ+OmZwzssZQOa8= -github.com/missdeer/golib v1.0.4/go.mod h1:mPN/UcszFq0GxKfQsZI3aFOiRjnzXCBZ392od3guGEY= github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= diff --git a/vendor/github.com/missdeer/golib/LICENSE b/vendor/github.com/missdeer/golib/LICENSE deleted file mode 100644 index 8000a6fa..00000000 --- a/vendor/github.com/missdeer/golib/LICENSE +++ /dev/null @@ -1,504 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 - USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random - Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/vendor/github.com/missdeer/golib/ic/convutf8.go b/vendor/github.com/missdeer/golib/ic/convutf8.go deleted file mode 100644 index b4851497..00000000 --- a/vendor/github.com/missdeer/golib/ic/convutf8.go +++ /dev/null @@ -1,72 +0,0 @@ -// Package ic convert text between CJK and UTF-8 in pure Go way -package ic - -import ( - "bytes" - "errors" - "io/ioutil" - "strings" - - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/japanese" - "golang.org/x/text/encoding/korean" - "golang.org/x/text/encoding/simplifiedchinese" - "golang.org/x/text/encoding/traditionalchinese" - "golang.org/x/text/transform" -) - -var ( - transformers = map[string]encoding.Encoding{ - "gbk": simplifiedchinese.GBK, - "cp936": simplifiedchinese.GBK, - "windows-936": simplifiedchinese.GBK, - "gb18030": simplifiedchinese.GB18030, - "gb2312": simplifiedchinese.HZGB2312, - "big5": traditionalchinese.Big5, - "big-5": traditionalchinese.Big5, - "cp950": traditionalchinese.Big5, - "euc-kr": korean.EUCKR, - "euckr": korean.EUCKR, - "cp949": korean.EUCKR, - "euc-jp": japanese.EUCJP, - "eucjp": japanese.EUCJP, - "shift-jis": japanese.ShiftJIS, - "iso-2022-jp": japanese.ISO2022JP, - "cp932": japanese.ISO2022JP, - "windows-31j": japanese.ISO2022JP, - } -) - -// ToUTF8 convert from CJK encoding to UTF-8 -func ToUTF8(from string, s []byte) ([]byte, error) { - var reader *transform.Reader - - transformer, ok := transformers[strings.ToLower(from)] - if !ok { - return s, errors.New("Unsupported encoding " + from) - } - reader = transform.NewReader(bytes.NewReader(s), transformer.NewDecoder()) - - d, e := ioutil.ReadAll(reader) - if e != nil { - return nil, e - } - return d, nil -} - -// FromUTF8 convert from UTF-8 encoding to CJK encoding -func FromUTF8(to string, s []byte) ([]byte, error) { - var reader *transform.Reader - - transformer, ok := transformers[strings.ToLower(to)] - if !ok { - return s, errors.New("Unsupported encoding " + to) - } - reader = transform.NewReader(bytes.NewReader(s), transformer.NewEncoder()) - - d, e := ioutil.ReadAll(reader) - if e != nil { - return nil, e - } - return d, nil -} diff --git a/vendor/github.com/missdeer/golib/ic/ic.go b/vendor/github.com/missdeer/golib/ic/ic.go deleted file mode 100644 index 9e414e36..00000000 --- a/vendor/github.com/missdeer/golib/ic/ic.go +++ /dev/null @@ -1,31 +0,0 @@ -package ic - -import "log" - -// Convert convert bytes from CJK or UTF-8 to UTF-8 or CJK -func Convert(from string, to string, src []byte) []byte { - if to == "utf-8" { - out, e := ToUTF8(from, src) - if e == nil { - return out - } - log.Printf("converting from %s to UTF-8 failed: %v", from, e) - return src - } - - if from == "utf-8" { - out, e := FromUTF8(to, src) - if e == nil { - return out - } - log.Printf("converting from UTF-8 to %s failed: %v", to, e) - return src - } - log.Println("only converting between CJK encodings and UTF-8 is supported") - return src -} - -// ConvertString convert string from CJK or UTF-8 to UTF-8 or CJK -func ConvertString(from string, to string, src string) string { - return string(Convert(from, to, []byte(src))) -} diff --git a/vendor/golang.org/x/text/encoding/unicode/override.go b/vendor/golang.org/x/text/encoding/unicode/override.go new file mode 100644 index 00000000..35d62fcc --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/override.go @@ -0,0 +1,82 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unicode + +import ( + "golang.org/x/text/transform" +) + +// BOMOverride returns a new decoder transformer that is identical to fallback, +// except that the presence of a Byte Order Mark at the start of the input +// causes it to switch to the corresponding Unicode decoding. It will only +// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE. +// +// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not +// just UTF-16 variants, and allowing falling back to any encoding scheme. +// +// This technique is recommended by the W3C for use in HTML 5: "For +// compatibility with deployed content, the byte order mark (also known as BOM) +// is considered more authoritative than anything else." +// http://www.w3.org/TR/encoding/#specification-hooks +// +// Using BOMOverride is mostly intended for use cases where the first characters +// of a fallback encoding are known to not be a BOM, for example, for valid HTML +// and most encodings. +func BOMOverride(fallback transform.Transformer) transform.Transformer { + // TODO: possibly allow a variadic argument of unicode encodings to allow + // specifying details of which fallbacks are supported as well as + // specifying the details of the implementations. This would also allow for + // support for UTF-32, which should not be supported by default. + return &bomOverride{fallback: fallback} +} + +type bomOverride struct { + fallback transform.Transformer + current transform.Transformer +} + +func (d *bomOverride) Reset() { + d.current = nil + d.fallback.Reset() +} + +var ( + // TODO: we could use decode functions here, instead of allocating a new + // decoder on every NewDecoder as IgnoreBOM decoders can be stateless. + utf16le = UTF16(LittleEndian, IgnoreBOM) + utf16be = UTF16(BigEndian, IgnoreBOM) +) + +const utf8BOM = "\ufeff" + +func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if d.current != nil { + return d.current.Transform(dst, src, atEOF) + } + if len(src) < 3 && !atEOF { + return 0, 0, transform.ErrShortSrc + } + d.current = d.fallback + bomSize := 0 + if len(src) >= 2 { + if src[0] == 0xFF && src[1] == 0xFE { + d.current = utf16le.NewDecoder() + bomSize = 2 + } else if src[0] == 0xFE && src[1] == 0xFF { + d.current = utf16be.NewDecoder() + bomSize = 2 + } else if len(src) >= 3 && + src[0] == utf8BOM[0] && + src[1] == utf8BOM[1] && + src[2] == utf8BOM[2] { + d.current = transform.Nop + bomSize = 3 + } + } + if bomSize < len(src) { + nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF) + } + return nDst, nSrc + bomSize, err +} diff --git a/vendor/golang.org/x/text/encoding/unicode/unicode.go b/vendor/golang.org/x/text/encoding/unicode/unicode.go new file mode 100644 index 00000000..dd99ad14 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/unicode.go @@ -0,0 +1,512 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package unicode provides Unicode encodings such as UTF-16. +package unicode // import "golang.org/x/text/encoding/unicode" + +import ( + "bytes" + "errors" + "unicode/utf16" + "unicode/utf8" + + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/internal" + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/internal/utf8internal" + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +// TODO: I think the Transformers really should return errors on unmatched +// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781, +// which leaves it open, but is suggested by WhatWG. It will allow for all error +// modes as defined by WhatWG: fatal, HTML and Replacement. This would require +// the introduction of some kind of error type for conveying the erroneous code +// point. + +// UTF8 is the UTF-8 encoding. It neither removes nor adds byte order marks. +var UTF8 encoding.Encoding = utf8enc + +// UTF8BOM is an UTF-8 encoding where the decoder strips a leading byte order +// mark while the encoder adds one. +// +// Some editors add a byte order mark as a signature to UTF-8 files. Although +// the byte order mark is not useful for detecting byte order in UTF-8, it is +// sometimes used as a convention to mark UTF-8-encoded files. This relies on +// the observation that the UTF-8 byte order mark is either an illegal or at +// least very unlikely sequence in any other character encoding. +var UTF8BOM encoding.Encoding = utf8bomEncoding{} + +type utf8bomEncoding struct{} + +func (utf8bomEncoding) String() string { + return "UTF-8-BOM" +} + +func (utf8bomEncoding) ID() (identifier.MIB, string) { + return identifier.Unofficial, "x-utf8bom" +} + +func (utf8bomEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{ + Transformer: &utf8bomEncoder{t: runes.ReplaceIllFormed()}, + } +} + +func (utf8bomEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: &utf8bomDecoder{}} +} + +var utf8enc = &internal.Encoding{ + &internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()}, + "UTF-8", + identifier.UTF8, +} + +type utf8bomDecoder struct { + checked bool +} + +func (t *utf8bomDecoder) Reset() { + t.checked = false +} + +func (t *utf8bomDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if !t.checked { + if !atEOF && len(src) < len(utf8BOM) { + if len(src) == 0 { + return 0, 0, nil + } + return 0, 0, transform.ErrShortSrc + } + if bytes.HasPrefix(src, []byte(utf8BOM)) { + nSrc += len(utf8BOM) + src = src[len(utf8BOM):] + } + t.checked = true + } + nDst, n, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF) + nSrc += n + return nDst, nSrc, err +} + +type utf8bomEncoder struct { + written bool + t transform.Transformer +} + +func (t *utf8bomEncoder) Reset() { + t.written = false + t.t.Reset() +} + +func (t *utf8bomEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if !t.written { + if len(dst) < len(utf8BOM) { + return nDst, 0, transform.ErrShortDst + } + nDst = copy(dst, utf8BOM) + t.written = true + } + n, nSrc, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF) + nDst += n + return nDst, nSrc, err +} + +type utf8Decoder struct{ transform.NopResetter } + +func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + var pSrc int // point from which to start copy in src + var accept utf8internal.AcceptRange + + // The decoder can only make the input larger, not smaller. + n := len(src) + if len(dst) < n { + err = transform.ErrShortDst + n = len(dst) + atEOF = false + } + for nSrc < n { + c := src[nSrc] + if c < utf8.RuneSelf { + nSrc++ + continue + } + first := utf8internal.First[c] + size := int(first & utf8internal.SizeMask) + if first == utf8internal.FirstInvalid { + goto handleInvalid // invalid starter byte + } + accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift] + if nSrc+size > n { + if !atEOF { + // We may stop earlier than necessary here if the short sequence + // has invalid bytes. Not checking for this simplifies the code + // and may avoid duplicate computations in certain conditions. + if err == nil { + err = transform.ErrShortSrc + } + break + } + // Determine the maximal subpart of an ill-formed subsequence. + switch { + case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]: + size = 1 + case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]: + size = 2 + default: + size = 3 // As we are short, the maximum is 3. + } + goto handleInvalid + } + if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c { + size = 1 + goto handleInvalid // invalid continuation byte + } else if size == 2 { + } else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 2 + goto handleInvalid // invalid continuation byte + } else if size == 3 { + } else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 3 + goto handleInvalid // invalid continuation byte + } + nSrc += size + continue + + handleInvalid: + // Copy the scanned input so far. + nDst += copy(dst[nDst:], src[pSrc:nSrc]) + + // Append RuneError to the destination. + const runeError = "\ufffd" + if nDst+len(runeError) > len(dst) { + return nDst, nSrc, transform.ErrShortDst + } + nDst += copy(dst[nDst:], runeError) + + // Skip the maximal subpart of an ill-formed subsequence according to + // the W3C standard way instead of the Go way. This Transform is + // probably the only place in the text repo where it is warranted. + nSrc += size + pSrc = nSrc + + // Recompute the maximum source length. + if sz := len(dst) - nDst; sz < len(src)-nSrc { + err = transform.ErrShortDst + n = nSrc + sz + atEOF = false + } + } + return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err +} + +// UTF16 returns a UTF-16 Encoding for the given default endianness and byte +// order mark (BOM) policy. +// +// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then +// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect +// the endianness used for decoding, and will instead be output as their +// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy +// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output. +// Instead, it overrides the default endianness e for the remainder of the +// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not +// affect the endianness used, and will instead be output as their standard +// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed +// with the default Endianness. For ExpectBOM, in that case, the transformation +// will return early with an ErrMissingBOM error. +// +// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of +// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not +// be inserted. The UTF-8 input does not need to contain a BOM. +// +// There is no concept of a 'native' endianness. If the UTF-16 data is produced +// and consumed in a greater context that implies a certain endianness, use +// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM. +// +// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM +// corresponds to "Where the precise type of the data stream is known... the +// BOM should not be used" and ExpectBOM corresponds to "A particular +// protocol... may require use of the BOM". +func UTF16(e Endianness, b BOMPolicy) encoding.Encoding { + return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]} +} + +// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that +// some configurations map to the same MIB identifier. RFC 2781 has requirements +// and recommendations. Some of the "configurations" are merely recommendations, +// so multiple configurations could match. +var mibValue = map[Endianness][numBOMValues]identifier.MIB{ + BigEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16BE, + UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781. + // TODO: acceptBOM | strictBOM would map to UTF16BE as well. + }, + LittleEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16LE, + UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows. + // TODO: acceptBOM | strictBOM would map to UTF16LE as well. + }, + // ExpectBOM is not widely used and has no valid MIB identifier. +} + +// All lists a configuration for each IANA-defined UTF-16 variant. +var All = []encoding.Encoding{ + UTF8, + UTF16(BigEndian, UseBOM), + UTF16(BigEndian, IgnoreBOM), + UTF16(LittleEndian, IgnoreBOM), +} + +// BOMPolicy is a UTF-16 encoding's byte order mark policy. +type BOMPolicy uint8 + +const ( + writeBOM BOMPolicy = 0x01 + acceptBOM BOMPolicy = 0x02 + requireBOM BOMPolicy = 0x04 + bomMask BOMPolicy = 0x07 + + // HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a + // map of an array of length 8 of a type that is also used as a key or value + // in another map). See golang.org/issue/11354. + // TODO: consider changing this value back to 8 if the use of 1.4.* has + // been minimized. + numBOMValues = 8 + 1 + + // IgnoreBOM means to ignore any byte order marks. + IgnoreBOM BOMPolicy = 0 + // Common and RFC 2781-compliant interpretation for UTF-16BE/LE. + + // UseBOM means that the UTF-16 form may start with a byte order mark, which + // will be used to override the default encoding. + UseBOM BOMPolicy = writeBOM | acceptBOM + // Common and RFC 2781-compliant interpretation for UTF-16. + + // ExpectBOM means that the UTF-16 form must start with a byte order mark, + // which will be used to override the default encoding. + ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM + // Used in Java as Unicode (not to be confused with Java's UTF-16) and + // ICU's UTF-16,version=1. Not compliant with RFC 2781. + + // TODO (maybe): strictBOM: BOM must match Endianness. This would allow: + // - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM + // (UnicodeBig and UnicodeLittle in Java) + // - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E: + // acceptBOM | strictBOM (e.g. assigned to CheckBOM). + // This addition would be consistent with supporting ExpectBOM. +) + +// Endianness is a UTF-16 encoding's default endianness. +type Endianness bool + +const ( + // BigEndian is UTF-16BE. + BigEndian Endianness = false + // LittleEndian is UTF-16LE. + LittleEndian Endianness = true +) + +// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a +// starting byte order mark. +var ErrMissingBOM = errors.New("encoding: missing byte order mark") + +type utf16Encoding struct { + config + mib identifier.MIB +} + +type config struct { + endianness Endianness + bomPolicy BOMPolicy +} + +func (u utf16Encoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: &utf16Decoder{ + initial: u.config, + current: u.config, + }} +} + +func (u utf16Encoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: &utf16Encoder{ + endianness: u.endianness, + initialBOMPolicy: u.bomPolicy, + currentBOMPolicy: u.bomPolicy, + }} +} + +func (u utf16Encoding) ID() (mib identifier.MIB, other string) { + return u.mib, "" +} + +func (u utf16Encoding) String() string { + e, b := "B", "" + if u.endianness == LittleEndian { + e = "L" + } + switch u.bomPolicy { + case ExpectBOM: + b = "Expect" + case UseBOM: + b = "Use" + case IgnoreBOM: + b = "Ignore" + } + return "UTF-16" + e + "E (" + b + " BOM)" +} + +type utf16Decoder struct { + initial config + current config +} + +func (u *utf16Decoder) Reset() { + u.current = u.initial +} + +func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + if len(src) == 0 { + return 0, 0, nil + } + if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 { + switch { + case src[0] == 0xfe && src[1] == 0xff: + u.current.endianness = BigEndian + nSrc = 2 + case src[0] == 0xff && src[1] == 0xfe: + u.current.endianness = LittleEndian + nSrc = 2 + default: + if u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + } + u.current.bomPolicy = IgnoreBOM + } + + var r rune + var dSize, sSize int + for nSrc < len(src) { + if nSrc+1 < len(src) { + x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + r, sSize = rune(x), 2 + if utf16.IsSurrogate(r) { + if nSrc+3 < len(src) { + x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + // Save for next iteration if it is not a high surrogate. + if isHighSurrogate(rune(x)) { + r, sSize = utf16.DecodeRune(r, rune(x)), 4 + } + } else if !atEOF { + err = transform.ErrShortSrc + break + } + } + if dSize = utf8.RuneLen(r); dSize < 0 { + r, dSize = utf8.RuneError, 3 + } + } else if atEOF { + // Single trailing byte. + r, dSize, sSize = utf8.RuneError, 3, 1 + } else { + err = transform.ErrShortSrc + break + } + if nDst+dSize > len(dst) { + err = transform.ErrShortDst + break + } + nDst += utf8.EncodeRune(dst[nDst:], r) + nSrc += sSize + } + return nDst, nSrc, err +} + +func isHighSurrogate(r rune) bool { + return 0xDC00 <= r && r <= 0xDFFF +} + +type utf16Encoder struct { + endianness Endianness + initialBOMPolicy BOMPolicy + currentBOMPolicy BOMPolicy +} + +func (u *utf16Encoder) Reset() { + u.currentBOMPolicy = u.initialBOMPolicy +} + +func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if u.currentBOMPolicy&writeBOM != 0 { + if len(dst) < 2 { + return 0, 0, transform.ErrShortDst + } + dst[0], dst[1] = 0xfe, 0xff + u.currentBOMPolicy = IgnoreBOM + nDst = 2 + } + + r, size := rune(0), 0 + for nSrc < len(src) { + r = rune(src[nSrc]) + + // Decode a 1-byte rune. + if r < utf8.RuneSelf { + size = 1 + + } else { + // Decode a multi-byte rune. + r, size = utf8.DecodeRune(src[nSrc:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + } + } + + if r <= 0xffff { + if nDst+2 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = uint8(r >> 8) + dst[nDst+1] = uint8(r) + nDst += 2 + } else { + if nDst+4 > len(dst) { + err = transform.ErrShortDst + break + } + r1, r2 := utf16.EncodeRune(r) + dst[nDst+0] = uint8(r1 >> 8) + dst[nDst+1] = uint8(r1) + dst[nDst+2] = uint8(r2 >> 8) + dst[nDst+3] = uint8(r2) + nDst += 4 + } + nSrc += size + } + + if u.endianness == LittleEndian { + for i := 0; i < nDst; i += 2 { + dst[i], dst[i+1] = dst[i+1], dst[i] + } + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go new file mode 100644 index 00000000..575cea87 --- /dev/null +++ b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go @@ -0,0 +1,87 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package utf8internal contains low-level utf8-related constants, tables, etc. +// that are used internally by the text package. +package utf8internal + +// The default lowest and highest continuation byte. +const ( + LoCB = 0x80 // 1000 0000 + HiCB = 0xBF // 1011 1111 +) + +// Constants related to getting information of first bytes of UTF-8 sequences. +const ( + // ASCII identifies a UTF-8 byte as ASCII. + ASCII = as + + // FirstInvalid indicates a byte is invalid as a first byte of a UTF-8 + // sequence. + FirstInvalid = xx + + // SizeMask is a mask for the size bits. Use use x&SizeMask to get the size. + SizeMask = 7 + + // AcceptShift is the right-shift count for the first byte info byte to get + // the index into the AcceptRanges table. See AcceptRanges. + AcceptShift = 4 + + // The names of these constants are chosen to give nice alignment in the + // table below. The first nibble is an index into acceptRanges or F for + // special one-byte cases. The second nibble is the Rune length or the + // Status for the special one-byte case. + xx = 0xF1 // invalid: size 1 + as = 0xF0 // ASCII: size 1 + s1 = 0x02 // accept 0, size 2 + s2 = 0x13 // accept 1, size 3 + s3 = 0x03 // accept 0, size 3 + s4 = 0x23 // accept 2, size 3 + s5 = 0x34 // accept 3, size 4 + s6 = 0x04 // accept 0, size 4 + s7 = 0x44 // accept 4, size 4 +) + +// First is information about the first byte in a UTF-8 sequence. +var First = [256]uint8{ + // 1 2 3 4 5 6 7 8 9 A B C D E F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F + // 1 2 3 4 5 6 7 8 9 A B C D E F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF + xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF + s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF + s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF + s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF +} + +// AcceptRange gives the range of valid values for the second byte in a UTF-8 +// sequence for any value for First that is not ASCII or FirstInvalid. +type AcceptRange struct { + Lo uint8 // lowest value for second byte. + Hi uint8 // highest value for second byte. +} + +// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b +// +// AcceptRanges[First[b[0]]>>AcceptShift] +// +// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting +// at b[0]. +var AcceptRanges = [...]AcceptRange{ + 0: {LoCB, HiCB}, + 1: {0xA0, HiCB}, + 2: {LoCB, 0x9F}, + 3: {0x90, HiCB}, + 4: {LoCB, 0x8F}, +} diff --git a/vendor/modules.txt b/vendor/modules.txt index dfc3a0b2..6c4d9bcd 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -281,9 +281,6 @@ github.com/minio/minio-go/v7/pkg/tags # github.com/minio/sha256-simd v1.0.0 ## explicit; go 1.13 github.com/minio/sha256-simd -# github.com/missdeer/golib v1.0.4 -## explicit; go 1.12 -github.com/missdeer/golib/ic # github.com/mitchellh/go-homedir v1.1.0 ## explicit github.com/mitchellh/go-homedir @@ -526,9 +523,11 @@ golang.org/x/text/encoding/japanese golang.org/x/text/encoding/korean golang.org/x/text/encoding/simplifiedchinese golang.org/x/text/encoding/traditionalchinese +golang.org/x/text/encoding/unicode golang.org/x/text/internal/language golang.org/x/text/internal/language/compact golang.org/x/text/internal/tag +golang.org/x/text/internal/utf8internal golang.org/x/text/language golang.org/x/text/runes golang.org/x/text/secure/bidirule