-
Notifications
You must be signed in to change notification settings - Fork 0
/
netscaper.go
173 lines (148 loc) · 4.08 KB
/
netscaper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package netscaper
import (
"errors"
"html"
"os"
"regexp"
"strconv"
"strings"
"time"
)
const (
href = "HREF"
icon = "ICON"
iconURI = "ICON_URI"
tags = "TAGS"
addDate = "ADD_DATE"
lastModified = "LAST_MODIFIED"
private = "PRIVATE"
lastVisited = "LAST_VISITED"
)
var (
h3 = regexp.MustCompile("<H3.*>(.*)</H3>")
h3End = regexp.MustCompile(`</DL>\s*<p>\s*(<HR>)?$`)
spaces = regexp.MustCompile(`\s{2,}`)
anchorAttributeRegex = regexp.MustCompile(`(HREF|ADD_DATE|LAST_MODIFIED|ICON_URI|ICON|TAGS|PRIVATE|LAST_VISITED)*="(.*?)"`)
anchorTitle = regexp.MustCompile(`<A.*>(.*)</A>`)
)
type Bookmark struct {
Href string
Title string
Description string
Icon string
IconURI string
Tags []string
FolderPath string
AddDate time.Time
LastModified time.Time
LastVisited time.Time
Private bool
}
type Options struct {
// The string used for separating folders in the folder path.
// Default is ␝
FolderPathSeparator string
// Whether to parse description or not.
// This refers to the <DD> tags
ParseDescription bool
// Whether to ignore bookmarklets or not.
// This refers to URLs starting with javascript:
IgnoreBookmarklets bool
}
func Parse(str string, opts *Options) ([]Bookmark, error) {
if opts == nil {
opts = &Options{
FolderPathSeparator: "␝",
ParseDescription: false,
IgnoreBookmarklets: true,
}
}
if !strings.HasPrefix(str, "<!DOCTYPE NETSCAPE-Bookmark-file-1>") {
return nil, errors.New("not a valid file")
}
folderPath := make([]string, 0)
entities := strings.Split(str, "<DT>")
bookmarks := make([]Bookmark, 0)
for _, entity := range entities {
entity = cleanStr(entity)
// An H3 tag means a start of a folder
if strings.HasPrefix(strings.ToUpper(entity), "<H3") {
res := strings.TrimSpace(h3.FindStringSubmatch(entity)[1])
folderPath = append(folderPath, res)
}
// An single bookmark is a link
if strings.HasPrefix(strings.ToUpper(entity), "<A") {
aTag := entity
if !strings.HasSuffix(entity, "</A>") {
lastA := strings.LastIndex(entity, "</A>")
aTag = entity[:lastA+4]
}
bm := getAnchorAttributes(aTag)
// Ignore or keep bookmarklets according to options
isBookmarklet := strings.HasPrefix(bm.Href, "javascript:")
if isBookmarklet && opts.IgnoreBookmarklets {
continue
}
// If there's a description it starts with a <DD> tag
if opts.ParseDescription {
checkDesc := strings.Split(entity, "<DD>")
if len(checkDesc) == 2 {
bm.Description = cleanStr(checkDesc[1])
}
}
bm.FolderPath = strings.Join(folderPath, opts.FolderPathSeparator)
bookmarks = append(bookmarks, bm)
}
// A closing </DL><p> tag means the end of a folder
if h3End.MatchString(entity) && len(folderPath) > 0 {
folderPath = folderPath[:len(folderPath)-1]
}
}
return bookmarks, nil
}
func ParseFromFile(path string, opts *Options) ([]Bookmark, error) {
contents, err := os.ReadFile(path)
if err != nil {
return nil, nil
}
return Parse(string(contents), opts)
}
func getAnchorAttributes(anchorStr string) Bookmark {
bm := Bookmark{}
bm.Title = html.UnescapeString(anchorTitle.FindStringSubmatch(anchorStr)[1])
bm.Tags = make([]string, 0)
attributeKeyValues := anchorAttributeRegex.FindAllStringSubmatch(anchorStr, -1)
for _, v := range attributeKeyValues {
key, value := v[1], v[2]
switch key {
case href:
bm.Href = value
case icon:
bm.Icon = value
case iconURI:
bm.IconURI = value
case tags:
bm.Tags = strings.Split(value, ",")
case addDate:
intVal, _ := strconv.Atoi(value)
bm.AddDate = time.Unix(int64(intVal), 0)
case lastModified:
intVal, _ := strconv.Atoi(value)
bm.LastModified = time.Unix(int64(intVal), 0)
case lastVisited:
intVal, _ := strconv.Atoi(value)
bm.LastVisited = time.Unix(int64(intVal), 0)
case private:
bm.Private = value == "1"
}
}
return bm
}
func cleanStr(str string) string {
return strings.TrimSpace(
spaces.ReplaceAllString(
strings.ReplaceAll(str, "\n", ""),
" ",
),
)
}