Skip to content

Commit

Permalink
Merge pull request #960 from pengli09/chunk_evaluator
Browse files Browse the repository at this point in the history
Add excluded_chunk_types to ChunkEvaluator
  • Loading branch information
pengli09 authored Dec 20, 2016
2 parents 8a42a54 + 6e405a1 commit 2965df5
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 22 deletions.
17 changes: 14 additions & 3 deletions paddle/gserver/evaluators/ChunkEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <set>
#include <vector>

#include "paddle/math/Vector.h"
Expand Down Expand Up @@ -72,6 +73,7 @@ class ChunkEvaluator : public Evaluator {

std::vector<Segment> labelSegments_;
std::vector<Segment> outputSegments_;
std::set<int> excludedChunkTypes_;

public:
virtual void init(const EvaluatorConfig& config) {
Expand Down Expand Up @@ -105,6 +107,10 @@ class ChunkEvaluator : public Evaluator {
}
CHECK(config.has_num_chunk_types()) << "Missing num_chunk_types in config";
otherChunkType_ = numChunkTypes_ = config.num_chunk_types();

// the chunks of types in excludedChunkTypes_ will not be counted
auto& tmp = config.excluded_chunk_types();
excludedChunkTypes_.insert(tmp.begin(), tmp.end());
}

virtual void start() {
Expand Down Expand Up @@ -156,7 +162,8 @@ class ChunkEvaluator : public Evaluator {
getSegments(label, length, labelSegments_);
size_t i = 0, j = 0;
while (i < outputSegments_.size() && j < labelSegments_.size()) {
if (outputSegments_[i] == labelSegments_[j]) {
if (outputSegments_[i] == labelSegments_[j] &&
excludedChunkTypes_.count(outputSegments_[i].type) != 1) {
++numCorrect_;
}
if (outputSegments_[i].end < labelSegments_[j].end) {
Expand All @@ -168,8 +175,12 @@ class ChunkEvaluator : public Evaluator {
++j;
}
}
numLabelSegments_ += labelSegments_.size();
numOutputSegments_ += outputSegments_.size();
for (auto& segment : labelSegments_) {
if (excludedChunkTypes_.count(segment.type) != 1) ++numLabelSegments_;
}
for (auto& segment : outputSegments_) {
if (excludedChunkTypes_.count(segment.type) != 1) ++numOutputSegments_;
}
}

void getSegments(int* label, int length, std::vector<Segment>& segments) {
Expand Down
10 changes: 8 additions & 2 deletions proto/ModelConfig.proto
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,10 @@ message EvaluatorConfig {
repeated string input_layers = 3;

// Used by ChunkEvaluator
optional string chunk_scheme = 4; // one of "IOB", "IOE", "IOBES"
optional int32 num_chunk_types = 5; // number of chunk types other than "other"
// one of "IOB", "IOE", "IOBES"
optional string chunk_scheme = 4;
// number of chunk types other than "other"
optional int32 num_chunk_types = 5;

// Used by PrecisionRecallEvaluator and ClassificationErrorEvaluator
// For multi binary labels: true if output > classification_threshold
Expand All @@ -453,6 +455,10 @@ message EvaluatorConfig {

// whether to delimit the sequence in the seq_text_printer
optional bool delimited = 11 [default = true];

// Used by ChunkEvaluator
// chunk of these types are not counted
repeated int32 excluded_chunk_types = 12;
}

message LinkConfig {
Expand Down
6 changes: 5 additions & 1 deletion python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,7 +1240,8 @@ def Evaluator(
dict_file=None,
result_file=None,
num_results=None,
delimited=None, ):
delimited=None,
excluded_chunk_types=None, ):
evaluator = g_config.model_config.evaluators.add()
evaluator.type = type
evaluator.name = MakeLayerNameInSubmodel(name)
Expand Down Expand Up @@ -1269,6 +1270,9 @@ def Evaluator(
if delimited is not None:
evaluator.delimited = delimited

if excluded_chunk_types:
evaluator.excluded_chunk_types.extend(excluded_chunk_types)


class LayerBase(object):
def __init__(
Expand Down
39 changes: 23 additions & 16 deletions python/paddle/trainer_config_helpers/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,21 @@ def impl(method):
return impl


def evaluator_base(input,
type,
label=None,
weight=None,
name=None,
chunk_scheme=None,
num_chunk_types=None,
classification_threshold=None,
positive_label=None,
dict_file=None,
result_file=None,
num_results=None,
delimited=None):
def evaluator_base(
input,
type,
label=None,
weight=None,
name=None,
chunk_scheme=None,
num_chunk_types=None,
classification_threshold=None,
positive_label=None,
dict_file=None,
result_file=None,
num_results=None,
delimited=None,
excluded_chunk_types=None, ):
"""
Evaluator will evaluate the network status while training/testing.
Expand Down Expand Up @@ -127,7 +129,8 @@ def evaluator_base(input,
positive_label=positive_label,
dict_file=dict_file,
result_file=result_file,
delimited=delimited)
delimited=delimited,
excluded_chunk_types=excluded_chunk_types, )


@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
Expand Down Expand Up @@ -330,7 +333,8 @@ def chunk_evaluator(
label,
chunk_scheme,
num_chunk_types,
name=None, ):
name=None,
excluded_chunk_types=None, ):
"""
Chunk evaluator is used to evaluate segment labelling accuracy for a
sequence. It calculates the chunk detection F1 score.
Expand Down Expand Up @@ -376,14 +380,17 @@ def chunk_evaluator(
:param num_chunk_types: number of chunk types other than "other"
:param name: The Evaluator name, it is optional.
:type name: basename|None
:param excluded_chunk_types: chunks of these types are not considered
:type excluded_chunk_types: list of integer|None
"""
evaluator_base(
name=name,
type="chunk",
input=input,
label=label,
chunk_scheme=chunk_scheme,
num_chunk_types=num_chunk_types)
num_chunk_types=num_chunk_types,
excluded_chunk_types=excluded_chunk_types, )


@evaluator(EvaluatorAttribute.FOR_UTILS)
Expand Down

0 comments on commit 2965df5

Please sign in to comment.