forked from GullyAPCBurns/lapdftext
-
Notifications
You must be signed in to change notification settings - Fork 44
/
general.drl
94 lines (80 loc) · 2.11 KB
/
general.drl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#created on: Jul 30, 2010
package edu.isi.bmkeg.pdf.classification.rules
#list any import classes here.
import edu.isi.bmkeg.lapdf.features.ChunkFeatures;
import edu.isi.bmkeg.lapdf.model.ChunkBlock;
#declare any global variables here
global ChunkBlock chunk;
rule "Title"
activation-group "blockClassification"
salience 4
when
ChunkFeatures(pageNumber==1)
ChunkFeatures(heightDifferenceBetweenChunkWordAndDocumentWord>8)
ChunkFeatures(alignedMiddle==true)
then
chunk.setType(chunk.TYPE_TITLE);
end
rule "body"
activation-group "blockClassification"
salience 4
when
ChunkFeatures(mostPopularFontInDocument==true)
ChunkFeatures(alignedWithColumnBoundaries==true)
then
chunk.setType(chunk.TYPE_BODY);
end
rule "body2"
activation-group "blockClassification"
salience 4
when
ChunkFeatures(nextMostPopularFontInDocument==true)
ChunkFeatures(alignedWithColumnBoundaries==true)
then
chunk.setType(chunk.TYPE_BODY);
end
rule "heading"
activation-group "blockClassification"
salience 3
when
ChunkFeatures(withinBodyTextFrame==true)
ChunkFeatures(alignedWithColumnBoundaries==true)
eval(chunk.readNumberOfLine()<=3)
then
chunk.setType(chunk.TYPE_HEADING);
end
rule "figure-legend"
activation-group "blockClassification"
salience 4
when
ChunkFeatures(mostPopularFontInDocument==false)
eval(chunk.isMatchingRegularExpression("^(Figure \\d+|FIGURE \\d+)")==true)
then
chunk.setType(chunk.TYPE_FIGURE_LEGEND);
end
rule "figure-legend2"
activation-group "blockClassification"
salience 4
when
eval(chunk.isUnderOneLineFlushNeighboursOfType(chunk.TYPE_FIGURE_LEGEND)==true)
then
chunk.setType(chunk.TYPE_HEADING);
end
rule "Header"
activation-group "blockClassification"
salience 4
when
eval(chunk.readNumberOfLine()==1)
ChunkFeatures(containingFirstLineOfPage==true)
then
chunk.setType(chunk.TYPE_HEADER);
end
rule "Footer"
activation-group "blockClassification"
salience 4
when
eval(chunk.readNumberOfLine()==1)
ChunkFeatures(containingLastLineOfPage==true)
then
chunk.setType(chunk.TYPE_FOOTER);
end