mirror of
https://github.com/cwinfo/matterbridge.git
synced 2024-12-27 12:25:40 +00:00
463 lines
9.3 KiB
Go
463 lines
9.3 KiB
Go
|
// Copyright 2015 The Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Package linkify provides a way to find links in plain text.
|
||
|
package linkify
|
||
|
|
||
|
import (
|
||
|
"strings"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// Link represents a link found in a string with a schema and a position in the string.
|
||
|
type Link struct {
|
||
|
Scheme string
|
||
|
Start, End int
|
||
|
}
|
||
|
|
||
|
func max(a, b int) int {
|
||
|
if a >= b {
|
||
|
return a
|
||
|
}
|
||
|
return b
|
||
|
}
|
||
|
|
||
|
// Links returns links found in s.
|
||
|
func Links(s string) (links []Link) {
|
||
|
for i := 0; i < len(s)-2; i++ {
|
||
|
switch s[i] {
|
||
|
case '.': // IP address or domain name
|
||
|
if i == 0 {
|
||
|
continue // . at the start of a line
|
||
|
}
|
||
|
if length := match(s[i+1:]); length > 0 {
|
||
|
pos := i + 1 + length
|
||
|
switch s[pos-1] {
|
||
|
case '.': // IP address
|
||
|
if pos >= len(s) {
|
||
|
continue // . at the end of line
|
||
|
}
|
||
|
if !digit(s[i-1]) {
|
||
|
i = pos
|
||
|
continue // . should be preceded by a digit
|
||
|
}
|
||
|
if !digit(s[pos]) {
|
||
|
i = pos
|
||
|
continue // . should be followed by a digit
|
||
|
}
|
||
|
|
||
|
// find the start of the IP address
|
||
|
j := i - 2
|
||
|
m := max(0, j-3)
|
||
|
for j >= m && digit(s[j]) {
|
||
|
j--
|
||
|
}
|
||
|
if i-2-j > 2 {
|
||
|
i = pos + 1
|
||
|
continue // at most 3 digits
|
||
|
}
|
||
|
start := 0
|
||
|
if j >= 0 {
|
||
|
r, rlen := utf8.DecodeLastRuneInString(s[:j+1])
|
||
|
if !isPunctOrSpaceOrControl(r) {
|
||
|
i = pos + 1
|
||
|
continue
|
||
|
}
|
||
|
switch r {
|
||
|
case '.', ':', '/', '\\', '-', '_':
|
||
|
i = pos + 1
|
||
|
continue
|
||
|
}
|
||
|
start = j + 2 - rlen
|
||
|
}
|
||
|
|
||
|
length, ok := skipIPv4(s[start:])
|
||
|
if !ok {
|
||
|
i = pos + 1
|
||
|
continue
|
||
|
}
|
||
|
end := start + length
|
||
|
if end == len(s) {
|
||
|
links = append(links, Link{
|
||
|
Scheme: "",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
return
|
||
|
}
|
||
|
|
||
|
r, _ := utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
end = skipPort(s, end)
|
||
|
end = skipPath(s, end)
|
||
|
end = skipQuery(s, end)
|
||
|
end = skipFragment(s, end)
|
||
|
end = unskipPunct(s, end)
|
||
|
|
||
|
if end < len(s) {
|
||
|
r, _ = utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
|
||
|
default: // domain name
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||
|
if isPunctOrSpaceOrControl(r) {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if pos == len(s) {
|
||
|
start, ok := findHostnameStart(s, i)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
links = append(links, Link{
|
||
|
Scheme: "",
|
||
|
Start: start,
|
||
|
End: pos,
|
||
|
})
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if s[i+1:pos] != "xn--" {
|
||
|
r, _ = utf8.DecodeRuneInString(s[pos:])
|
||
|
if isLetterOrDigit(r) {
|
||
|
continue // should not be followed by a letter or a digit
|
||
|
}
|
||
|
}
|
||
|
|
||
|
end, dot, ok := findHostnameEnd(s, pos)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
dot = max(dot, i)
|
||
|
|
||
|
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||
|
if length := match(s[dot+1:]); dot+length+1 != end {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
start, ok := findHostnameStart(s, i)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
end = skipPort(s, end)
|
||
|
end = skipPath(s, end)
|
||
|
end = skipQuery(s, end)
|
||
|
end = skipFragment(s, end)
|
||
|
end = unskipPunct(s, end)
|
||
|
|
||
|
if end < len(s) {
|
||
|
r, _ = utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||
|
continue // should be followed by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
}
|
||
|
}
|
||
|
|
||
|
case '/': // schema-less link
|
||
|
if s[i+1] != '/' {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if i > 0 {
|
||
|
if s[i-1] == ':' {
|
||
|
i++
|
||
|
continue // should not be preceded by a colon
|
||
|
}
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||
|
if !isPunctOrSpaceOrControl(r) {
|
||
|
i++
|
||
|
continue // should be preceded by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r, _ := utf8.DecodeRuneInString(s[i+2:])
|
||
|
if !isLetterOrDigit(r) {
|
||
|
i++
|
||
|
continue // should be followed by a letter or a digit
|
||
|
}
|
||
|
|
||
|
start := i
|
||
|
end, dot, ok := findHostnameEnd(s, i+2)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
if s[i+2:end] != "localhost" {
|
||
|
if dot == -1 {
|
||
|
continue // no dot
|
||
|
}
|
||
|
if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end {
|
||
|
if length := match(s[dot+1:]); dot+length+1 != end {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
end = skipPort(s, end)
|
||
|
end = skipPath(s, end)
|
||
|
end = skipQuery(s, end)
|
||
|
end = skipFragment(s, end)
|
||
|
end = unskipPunct(s, end)
|
||
|
|
||
|
if end < len(s) {
|
||
|
r, _ = utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||
|
continue // should be followed by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "//",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
|
||
|
case ':': // http, https, ftp, mailto or localhost
|
||
|
if i < 3 { // at least ftp:
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" {
|
||
|
j := i - 9
|
||
|
if !digit(s[j+10]) {
|
||
|
continue
|
||
|
}
|
||
|
if j > 0 {
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:j])
|
||
|
if !isPunctOrSpaceOrControl(r) {
|
||
|
i++
|
||
|
continue // should be preceded by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
start := j
|
||
|
pos := j + 9
|
||
|
end := skipPort(s, pos)
|
||
|
if end == pos {
|
||
|
continue // invalid port
|
||
|
}
|
||
|
end = skipPath(s, end)
|
||
|
end = skipQuery(s, end)
|
||
|
end = skipFragment(s, end)
|
||
|
end = unskipPunct(s, end)
|
||
|
|
||
|
if end < len(s) {
|
||
|
r, _ := utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||
|
i++
|
||
|
continue // should be followed by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
|
||
|
break
|
||
|
}
|
||
|
|
||
|
j := i - 1
|
||
|
var start int
|
||
|
var schema string
|
||
|
|
||
|
switch byteToLower(s[j]) {
|
||
|
case 'o': // mailto
|
||
|
if j < 5 {
|
||
|
continue // too short for mailto
|
||
|
}
|
||
|
if len(s)-j < 8 {
|
||
|
continue // insufficient length after
|
||
|
}
|
||
|
if strings.ToLower(s[j-5:j+2]) != "mailto:" {
|
||
|
continue
|
||
|
}
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:j-5])
|
||
|
if isLetterOrDigit(r) {
|
||
|
continue // should not be preceded by a letter or a digit
|
||
|
}
|
||
|
r, _ = utf8.DecodeRuneInString(s[j+2:])
|
||
|
if !isAllowedInEmail(r) {
|
||
|
continue // should be followed by a valid e-mail character
|
||
|
}
|
||
|
|
||
|
start = j - 5
|
||
|
end, ok := findEmailEnd(s, j+2)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "mailto:",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
continue // continue processing
|
||
|
|
||
|
case 'p': // http or ftp
|
||
|
if len(s)-j < 8 {
|
||
|
continue // insufficient length after
|
||
|
}
|
||
|
switch byteToLower(s[j-2]) {
|
||
|
case 'f':
|
||
|
if strings.ToLower(s[j-2:j+4]) != "ftp://" {
|
||
|
continue
|
||
|
}
|
||
|
start = j - 2
|
||
|
schema = "ftp:"
|
||
|
case 't':
|
||
|
if j < 3 {
|
||
|
continue
|
||
|
}
|
||
|
if strings.ToLower(s[j-3:j+4]) != "http://" {
|
||
|
continue
|
||
|
}
|
||
|
start = j - 3
|
||
|
schema = "http:"
|
||
|
default:
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
case 's': // https
|
||
|
if j < 4 {
|
||
|
continue // too short for https
|
||
|
}
|
||
|
if len(s)-j < 8 {
|
||
|
continue // insufficient length after
|
||
|
}
|
||
|
start = j - 4
|
||
|
if strings.ToLower(s[start:j+4]) != "https://" {
|
||
|
continue
|
||
|
}
|
||
|
schema = "https:"
|
||
|
|
||
|
default:
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// http, https or ftp
|
||
|
|
||
|
if start > 0 {
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:start])
|
||
|
if !isPunctOrSpaceOrControl(r) {
|
||
|
continue // should be preceded by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r, _ := utf8.DecodeRuneInString(s[j+4:])
|
||
|
if isPunctOrSpaceOrControl(r) {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
end, dot, ok := findHostnameEnd(s, j+4)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
if s[j+4:end] != "localhost" {
|
||
|
if dot == -1 {
|
||
|
continue // no dot
|
||
|
}
|
||
|
if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end {
|
||
|
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||
|
if length := match(s[dot+1:]); dot+length+1 != end {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
end = skipPort(s, end)
|
||
|
end = skipPath(s, end)
|
||
|
end = skipQuery(s, end)
|
||
|
end = skipFragment(s, end)
|
||
|
end = unskipPunct(s, end)
|
||
|
|
||
|
if end < len(s) {
|
||
|
r, _ = utf8.DecodeRuneInString(s[end:])
|
||
|
if !isPunctOrSpaceOrControl(r) || r == '%' {
|
||
|
continue // should be followed by punctuation or space
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: schema,
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
|
||
|
case '@': // schema-less e-mail
|
||
|
if i == 0 {
|
||
|
continue // @ at the start of a line
|
||
|
}
|
||
|
|
||
|
if len(s)-i < 5 {
|
||
|
continue // insufficient length after
|
||
|
}
|
||
|
|
||
|
r, _ := utf8.DecodeLastRuneInString(s[:i])
|
||
|
if !isAllowedInEmail(r) {
|
||
|
continue // should be preceded by a valid e-mail character
|
||
|
}
|
||
|
|
||
|
r, _ = utf8.DecodeRuneInString(s[i+1:])
|
||
|
if !isLetterOrDigit(r) {
|
||
|
continue // should be followed by a letter or a digit
|
||
|
}
|
||
|
|
||
|
start, ok := findEmailStart(s, i-1)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
end, dot, ok := findHostnameEnd(s, i+1)
|
||
|
if !ok {
|
||
|
continue
|
||
|
}
|
||
|
if dot == -1 {
|
||
|
continue // no dot
|
||
|
}
|
||
|
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
|
||
|
if length := match(s[dot+1:]); dot+length+1 != end {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
links = append(links, Link{
|
||
|
Scheme: "mailto:",
|
||
|
Start: start,
|
||
|
End: end,
|
||
|
})
|
||
|
i = end
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|