-
Notifications
You must be signed in to change notification settings - Fork 0
/
fullwidth.go
79 lines (71 loc) · 2.92 KB
/
fullwidth.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package gullwidth
import (
"errors"
"fmt"
"unicode"
)
// latinASCIICharMapping defines all the latin fullwidth characters that map
// to a character defined in the ASCII set.
//
// Currently Katakana, Hangul, and other special characters are not supported.
//
// U+FF00 does not correspond to a fullwidth ASCII 20 character as this is defined
// by the ideographic space character (defined separately below)
var latinASCIICharMapping map[byte]rune = map[byte]rune{
'!': '!', '"': '"', '#': '#', '$': '$',
'%': '%', '&': '&', '\'': ''', '(': '(',
')': ')', '*': '*', '+': '+', ',': ',',
'-': '-', '.': '.', '/': '/', '0': '0',
'1': '1', '2': '2', '3': '3', '4': '4',
'5': '5', '6': '6', '7': '7', '8': '8',
'9': '9', ':': ':', ';': ';', '<': '<',
'=': '=', '>': '>', '?': '?', '@': '@',
'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D',
'E': 'E', 'F': 'F', 'G': 'G', 'H': 'H',
'I': 'I', 'J': 'J', 'K': 'K', 'L': 'L',
'M': 'M', 'N': 'N', 'O': 'O', 'P': 'P',
'Q': 'Q', 'R': 'R', 'S': 'S', 'T': 'T',
'U': 'U', 'V': 'V', 'W': 'W', 'X': 'X',
'Y': 'Y', 'Z': 'Z', '[': '[', '\\': '\',
']': ']', '^': '^', '_': '_', '`': '`',
'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd',
'e': 'e', 'f': 'f', 'g': 'g', 'h': 'h',
'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l',
'm': 'm', 'n': 'n', 'o': 'o', 'p': 'p',
'q': 'q', 'r': 'r', 's': 's', 't': 't',
'u': 'u', 'v': 'v', 'w': 'w', 'x': 'x',
'y': 'y', 'z': 'z', '{': '{', '|': '|',
'}': '}', '~': '~',
}
// ideographicSpaceCharMapping covers fullwidth space, which is provided by
// U+3000 as the "Ideographic Space" character. Defined separately as this
// separate from the fullwidth definitions in unicode.
var ideographicSpaceCharMapping map[byte]rune = map[byte]rune{
' ': ' ',
}
// ErrNotASCII is returned when the Fullwidth function is passed a string
// containing a non-ASCII character.
var ErrNotASCII error = errors.New("string contans non-ascii characters")
// Fullwidth takes an ASCII string and converts it to unicode fullwidth
// characters. If the provided string contains characters that are not
// part of the ASCII character set, an error is returned.
func Fullwidth(ascii string) (fullwidth string, err error) {
if c, err := isASCII(ascii); err != nil {
return "", fmt.Errorf("%w: rune is not in the ASCII character set: %c", err, rune(c))
}
for i := 0; i < len(ascii); i++ {
fullwidth = fullwidth + string(latinASCIICharMapping[ascii[i]])
fullwidth = fullwidth + string(ideographicSpaceCharMapping[ascii[i]])
}
return fullwidth, nil
}
// isASCII checks if a given string contains non-ASCII characters. If
// a non-ASCII character is found, it is returned, alongside ErrNotASCII
func isASCII(s string) (byte, error) {
for i := 0; i < len(s); i++ {
if s[i] > unicode.MaxASCII {
return s[i], ErrNotASCII
}
}
return 0, nil
}