Skip to content

Commit

Permalink
Add support for multiple transforms of the same CSS selector.
Browse files Browse the repository at this point in the history
With this commit, we can supply multiple transforms for the same
CSS selector. It enables treating the same CSS selector with
different texts separately.

E.g.,

<h2>Chapter X</h2>
<h2>Section Y</h2>

with

"h2": [
  {
    "requiretext": "^Chapter",
    "Type": "Module"
  },
  {
    "requiretext": "^Section",
    "Type": "Section"
  }
]

Will treat "X" as Module and "Y" as "Section".
  • Loading branch information
antiagainst committed Jan 14, 2017
1 parent cdaa4aa commit 61db416
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 72 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,24 @@ The format of the selector value is:
}
```

And you can have multiple transformations specified for the same css selector:

```json
"css selector": [
{
"requiretext": "...",
"type": "..."
},
{
"requiretext": "...",
"type": "..."
}
]
```

The above allows you to fine tweak nodes selected via css selectors using
their text contents.

Full documentation on the regular expression format can be found here:
http://golang.org/pkg/regexp/syntax/

Expand Down
162 changes: 90 additions & 72 deletions dashing.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ type Dashing struct {
// Selectors to match.
Selectors map[string]interface{} `json:"selectors"`
// Final form of the Selectors field.
selectors map[string]*Transform `json:"-"`
selectors map[string][]*Transform `json:"-"`
// Entries that should be ignored.
Ignore []string `json:"ignore"`
// A 32x32 pixel PNG image.
Expand Down Expand Up @@ -216,61 +216,77 @@ func build(c *cli.Context) {
texasRanger(source, source_depth, name, dashing, db)
}

func decodeSingleTransform(val map[string]interface{}) (*Transform, error) {
var ttype, trep, attr string
var creg, cmatchpath, requireText *regexp.Regexp
var err error

if r, ok := val["attr"]; ok {
attr = r.(string)
}

if r, ok := val["type"]; ok {
ttype = r.(string)
}
if r, ok := val["regexp"]; ok {
creg, err = regexp.Compile(r.(string))
if err != nil {
return nil, fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
if r, ok := val["replacement"]; ok {
trep = r.(string)
}
if r, ok := val["requiretext"]; ok {
requireText, err = regexp.Compile(r.(string))
if err != nil {
return nil, fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
if r, ok := val["matchpath"]; ok {
cmatchpath, err = regexp.Compile(r.(string))
if err != nil {
return nil, fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
return &Transform{
Type: ttype,
Attribute: attr,
Regexp: creg,
Replacement: trep,
RequireText: requireText,
MatchPath: cmatchpath,
}, nil
}

func decodeSelectField(d *Dashing) error {
d.selectors = make(map[string]*Transform, len(d.Selectors))
d.selectors = make(map[string][]*Transform, len(d.Selectors))
for sel, val := range d.Selectors {
var trans *Transform
var err error
rv := reflect.Indirect(reflect.ValueOf(val))
if rv.Kind() == reflect.String {
trans = &Transform{
Type: val.(string),
}
d.selectors[sel] = append(d.selectors[sel], trans)
} else if rv.Kind() == reflect.Map {
val := val.(map[string]interface{})
var ttype, trep, attr string
var creg, cmatchpath, requireText *regexp.Regexp
var err error

if r, ok := val["attr"]; ok {
attr = r.(string)
if trans, err = decodeSingleTransform(val); err != nil {
return err
}

if r, ok := val["type"]; ok {
ttype = r.(string)
}
if r, ok := val["regexp"]; ok {
creg, err = regexp.Compile(r.(string))
if err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
d.selectors[sel] = append(d.selectors[sel], trans)
} else if rv.Kind() == reflect.Slice {
for i := 0; i < rv.Len(); i++ {
element := rv.Index(i).Interface().(map[string]interface{})
if trans, err = decodeSingleTransform(element); err != nil {
return err
}
}
if r, ok := val["replacement"]; ok {
trep = r.(string)
}
if r, ok := val["requiretext"]; ok {
requireText, err = regexp.Compile(r.(string))
if err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
if r, ok := val["matchpath"]; ok {
cmatchpath, err = regexp.Compile(r.(string))
if err != nil {
return fmt.Errorf("failed to compile regexp '%s': %s", r.(string), err)
}
}
trans = &Transform{
Type: ttype,
Attribute: attr,
Regexp: creg,
Replacement: trep,
RequireText: requireText,
MatchPath: cmatchpath,
d.selectors[sel] = append(d.selectors[sel], trans)
}
} else {
fmt.Errorf("Expected string or map. Kind is %s.", rv.Kind().String())
return fmt.Errorf("Expected string or map. Kind is %s.", rv.Kind().String())
}
d.selectors[sel] = trans
}
return nil
}
Expand Down Expand Up @@ -461,42 +477,44 @@ func parseHTML(path string, source_depth int, dest string, dashing Dashing) ([]*
}
}

for pattern, sel := range dashing.selectors {
// Skip this selector if file path doesn't match
if sel.MatchPath != nil && !sel.MatchPath.MatchString(path) {
continue
}

m := css.MustCompile(pattern)
found := m.MatchAll(top)
for _, n := range found {
textString := text(n)
if sel.RequireText != nil && !sel.RequireText.MatchString(textString) {
fmt.Printf("Skipping entry for '%s' (Text not matching given regexp '%v')\n", textString, sel.RequireText)
for pattern, sels := range dashing.selectors {
for _, sel := range sels {
// Skip this selector if file path doesn't match
if sel.MatchPath != nil && !sel.MatchPath.MatchString(path) {
continue
}
var name string
if len(sel.Attribute) != 0 {
name = attr(n, sel.Attribute)
} else {
name = textString
}

// Skip things explicitly ignored.
if ignored(name) {
fmt.Printf("Skipping entry for %s (Ignored by dashing JSON)\n", name)
continue
}
m := css.MustCompile(pattern)
found := m.MatchAll(top)
for _, n := range found {
textString := text(n)
if sel.RequireText != nil && !sel.RequireText.MatchString(textString) {
fmt.Printf("Skipping entry for '%s' (Text not matching given regexp '%v')\n", textString, sel.RequireText)
continue
}
var name string
if len(sel.Attribute) != 0 {
name = attr(n, sel.Attribute)
} else {
name = textString
}

// If we have a regexp, run it.
if sel.Regexp != nil {
name = sel.Regexp.ReplaceAllString(name, sel.Replacement)
}
// Skip things explicitly ignored.
if ignored(name) {
fmt.Printf("Skipping entry for %s (Ignored by dashing JSON)\n", name)
continue
}

// If we have a regexp, run it.
if sel.Regexp != nil {
name = sel.Regexp.ReplaceAllString(name, sel.Replacement)
}

// References we want to track.
refs = append(refs, &reference{name, sel.Type, path + "#" + anchor(n)})
// We need to modify the DOM with a special link to support TOC.
n.Parent.InsertBefore(newA(name, sel.Type), n)
// References we want to track.
refs = append(refs, &reference{name, sel.Type, path + "#" + anchor(n)})
// We need to modify the DOM with a special link to support TOC.
n.Parent.InsertBefore(newA(name, sel.Type), n)
}
}
}
return refs, writeHTML(path, dest, top)
Expand Down

0 comments on commit 61db416

Please sign in to comment.