Skip to content

Commit

Permalink
Replace more STRING by std::string
Browse files Browse the repository at this point in the history
Remove STRING::add_str_int and STRING::add_str_double which are now unused.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Mar 13, 2021
1 parent c9f0da4 commit 3b07599
Show file tree
Hide file tree
Showing 32 changed files with 207 additions and 247 deletions.
78 changes: 39 additions & 39 deletions src/ccstruct/blamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,19 @@ void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
// and the left edge of the right-hand word is word2_left.
void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
BlamerBundle *bundle2) const {
STRING debug_str;
std::string debug_str;
// Find truth boxes that correspond to the split in the blobs.
int b;
int begin2_truth_index = -1;
if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
debug_str = "Looking for truth split at";
debug_str.add_str_int(" end1_x ", word1_right);
debug_str.add_str_int(" begin2_x ", word2_left);
debug_str += " end1_x " + std::to_string(word1_right);
debug_str += " begin2_x " + std::to_string(word2_left);
debug_str += "\nnorm_truth_word boxes:\n";
if (norm_truth_word_.length() > 1) {
norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
norm_truth_word_.BlobBox(0).print_to_str(debug_str);
for (b = 1; b < norm_truth_word_.length(); ++b) {
norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
norm_truth_word_.BlobBox(b).print_to_str(debug_str);
if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
(abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
begin2_truth_index = b;
Expand Down Expand Up @@ -325,24 +325,24 @@ void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
if (missing_chop || box_index < norm_truth_word_.length()) {
STRING debug_str;
if (missing_chop) {
debug_str.add_str_int("Detected missing chop (tolerance=", norm_box_tolerance_);
debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
debug_str += ") at Bounding Box=";
TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(&debug_str);
debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
curr_blob->bounding_box().print_to_str(debug_str);
debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
} else {
debug_str.add_str_int("Missing chops for last ", norm_truth_word_.length() - box_index);
debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
debug_str += " truth box(es)";
}
debug_str += "\nMaximally chopped word boxes:\n";
for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(&debug_str);
curr_blob->bounding_box().print_to_str(debug_str);
debug_str += '\n';
}
debug_str += "Truth bounding boxes:\n";
for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
debug_str += '\n';
}
SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
Expand Down Expand Up @@ -413,8 +413,8 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
if (blob_index + 1 < num_blobs)
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
debug_str.add_str_int(" ", truth_x);
debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
debug_str += " " + std::to_string(truth_x);
debug_str += "\n";
if (curr_box_x > (truth_x + norm_box_tolerance_)) {
break; // failed to find a matching box
Expand All @@ -424,23 +424,23 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
correct_segmentation_cols_.push_back(curr_box_col);
correct_segmentation_rows_.push_back(next_box_col - 1);
++truth_idx;
debug_str.add_str_int("col=", curr_box_col);
debug_str.add_str_int(" row=", next_box_col - 1);
debug_str += "col=" + std::to_string(curr_box_col);
debug_str += " row=" + std::to_string(next_box_col - 1);
debug_str += "\n";
curr_box_col = next_box_col;
}
}
if (blob_index < num_blobs || // trailing blobs
correct_segmentation_cols_.size() != norm_truth_word_.length()) {
debug_str.add_str_int(
debug_str +=
"Blamer failed to find correct segmentation"
" (tolerance=",
norm_box_tolerance_);
" (tolerance=" +
std::to_string(norm_box_tolerance_);
if (blob_index >= num_blobs)
debug_str += " blob == nullptr";
debug_str += ")\n";
debug_str.add_str_int(" path length ", correct_segmentation_cols_.size());
debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
debug_str += "\n";
SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
correct_segmentation_cols_.clear();
Expand All @@ -457,7 +457,7 @@ bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
#if !defined(DISABLED_LEGACY_ENGINE)
// Setup ready to guide the segmentation search to the correct segmentation.
void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
UNICHAR_ID wildcard_id, bool debug, STRING *debug_str,
UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
WERD_RES *word_res) {
segsearch_is_looking_for_blame_ = true;
Expand All @@ -466,19 +466,19 @@ void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *rati
}
// Fill pain points for any unclassifed blob corresponding to the
// correct segmentation state.
*debug_str += "Correct segmentation:\n";
debug_str += "Correct segmentation:\n";
for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
*debug_str += "\n";
debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
debug_str += "\n";
if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
wildcard_id) &&
!pain_points->GeneratePainPoint(
correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
segsearch_is_looking_for_blame_ = false;
*debug_str += "\nFailed to insert pain point\n";
SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
debug_str += "\nFailed to insert pain point\n";
SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
break;
}
} // end for blamer_bundle->correct_segmentation_cols/rows
Expand All @@ -491,7 +491,7 @@ bool BlamerBundle::GuidedSegsearchStillGoing() const {
}

// The segmentation search has ended. Sets the blame appropriately.
void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str) {
void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
// If we are still looking for blame (i.e. best_choice is incorrect, but a
// path representing the correct segmentation could be constructed), we can
// blame segmentation search pain point prioritization if the rating of the
Expand All @@ -506,22 +506,22 @@ void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, S
if (segsearch_is_looking_for_blame_) {
segsearch_is_looking_for_blame_ = false;
if (best_choice_is_dict_and_top_choice_) {
*debug_str = "Best choice is: incorrect, top choice, dictionary word";
*debug_str += " with permuter ";
*debug_str += best_choice->permuter_name();
SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
debug_str = "Best choice is: incorrect, top choice, dictionary word";
debug_str += " with permuter ";
debug_str += best_choice->permuter_name();
SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
} else if (best_correctly_segmented_rating_ < best_choice->rating()) {
*debug_str += "Correct segmentation state was not explored";
SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
debug_str += "Correct segmentation state was not explored";
SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
} else {
if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
*debug_str += "Correct segmentation paths were pruned by LM\n";
debug_str += "Correct segmentation paths were pruned by LM\n";
} else {
debug_str->add_str_double("Best correct segmentation rating ",
best_correctly_segmented_rating_);
debug_str->add_str_double(" vs. best choice rating ", best_choice->rating());
debug_str += "Best correct segmentation rating " +
std::to_string(best_correctly_segmented_rating_);
debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
}
SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/ccstruct/blamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,12 @@ struct BlamerBundle {
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
// Setup ready to guide the segmentation search to the correct segmentation.
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points,
bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
double max_char_wh_ratio, WERD_RES *word_res);
// Returns true if the guided segsearch is in progress.
bool GuidedSegsearchStillGoing() const;
// The segmentation search has ended. Sets the blame appropriately.
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str);
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);

// If the bundle is null or still does not indicate the correct result,
// fix it and use some backup reason for the blame.
Expand Down
18 changes: 9 additions & 9 deletions src/ccstruct/boxread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
if (texts != nullptr)
texts->push_back(utf8_str);
if (box_texts != nullptr) {
STRING full_text;
MakeBoxFileStr(utf8_str.c_str(), box, target_page, &full_text);
std::string full_text;
MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
box_texts->push_back(full_text);
}
if (pages != nullptr)
Expand Down Expand Up @@ -255,13 +255,13 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
}

// Creates a box file string from a unichar string, TBOX and page number.
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str) {
*box_str = unichar_str;
box_str->add_str_int(" ", box.left());
box_str->add_str_int(" ", box.bottom());
box_str->add_str_int(" ", box.right());
box_str->add_str_int(" ", box.top());
box_str->add_str_int(" ", page_num);
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str) {
box_str = unichar_str;
box_str += " " + std::to_string(box.left());
box_str += " " + std::to_string(box.bottom());
box_str += " " + std::to_string(box.right());
box_str += " " + std::to_string(box.top());
box_str += " " + std::to_string(page_num);
}

} // namespace tesseract
2 changes: 1 addition & 1 deletion src/ccstruct/boxread.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str

// Creates a box file string from a unichar string, TBOX and page number.
TESS_API
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str);
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str);

} // namespace tesseract

Expand Down
4 changes: 2 additions & 2 deletions src/ccstruct/pageres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
int index = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
WERD_CHOICE *choice = it.data();
STRING label;
label.add_str_int("\nCooked Choice #", index);
std::string label;
label += "\nCooked Choice #" + std::to_string(index);
choice->print(label.c_str());
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/ccstruct/rect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,13 @@ void TBOX::plot( // paint box
#endif

// Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
void TBOX::print_to_str(STRING *str) const {
void TBOX::print_to_str(std::string &str) const {
// "(%d,%d)->(%d,%d)", left(), bottom(), right(), top()
str->add_str_int("(", left());
str->add_str_int(",", bottom());
str->add_str_int(")->(", right());
str->add_str_int(",", top());
*str += ')';
str += "(" + std::to_string(left());
str += "," + std::to_string(bottom());
str += ")->(" + std::to_string(right());
str += "," + std::to_string(top());
str += ')';
}

// Writes to the given file. Returns false in case of error.
Expand Down
8 changes: 3 additions & 5 deletions src/ccstruct/rect.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* File: rect.h (Formerly box.h)
* Description: Bounding box class definition.
* Author: Phil Cheatle
* Created: Wed Oct 16 15:18:45 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -30,11 +29,10 @@
#include <cmath> // for std::ceil, std::floor
#include <cstdint> // for INT16_MAX
#include <cstdio> // for FILE
#include <string> // for std::string

namespace tesseract {

class STRING;

class TESS_API TBOX { // bounding box
public:
TBOX()
Expand Down Expand Up @@ -287,8 +285,8 @@ class TESS_API TBOX { // bounding box
void print() const { // print
tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top());
}
// Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
void print_to_str(STRING *str) const;
// Appends the bounding box as (%d,%d)->(%d,%d) to a string.
void print_to_str(std::string &str) const;

#ifndef GRAPHICS_DISABLED
void plot( // use current settings
Expand Down
23 changes: 0 additions & 23 deletions src/ccutil/strngs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,27 +102,4 @@ void STRING::split(const char c, std::vector<STRING> *splited) {
}
}

void STRING::add_str_int(const char *str, int number) {
if (str != nullptr)
*this += str;
// Allow space for the maximum possible length of int64_t.
char num_buffer[kMaxIntSize];
snprintf(num_buffer, kMaxIntSize - 1, "%d", number);
num_buffer[kMaxIntSize - 1] = '\0';
*this += num_buffer;
}

// Appends the given string and double (as a %.8g) to this.
void STRING::add_str_double(const char *str, double number) {
if (str != nullptr)
*this += str;
std::stringstream stream;
// Use "C" locale (needed for double value).
stream.imbue(std::locale::classic());
// Use 8 digits for double value.
stream.precision(8);
stream << number;
*this += stream.str().c_str();
}

} // namespace tesseract
10 changes: 0 additions & 10 deletions src/ccutil/strngs.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,6 @@ class STRING : public std::string {

TESS_API
void split(char c, std::vector<STRING> *splited);

// Appends the given string and int (as a %d) to this.
// += cannot be used for ints as there as a char += operator that would
// be ambiguous, and ints usually need a string before or between them
// anyway.
TESS_API
void add_str_int(const char *str, int number);
// Appends the given string and double (as a %.8g) to this.
TESS_API
void add_str_double(const char *str, double number);
};

} // namespace tesseract.
Expand Down
8 changes: 4 additions & 4 deletions src/ccutil/unicharcompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,17 +325,17 @@ bool UnicharCompress::DeSerialize(TFile *fp) {
// will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
// and the Indic scripts will contain a many-to-many mapping.
// See the class comment above for details.
STRING UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
STRING encoding;
std::string UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
std::string encoding;
for (int c = 0; c < encoder_.size(); ++c) {
const RecodedCharID &code = encoder_[c];
if (0 < c && c < SPECIAL_UNICHAR_CODES_COUNT && code == encoder_[c - 1]) {
// Don't show the duplicate entry.
continue;
}
encoding.add_str_int("", code(0));
encoding += std::to_string(code(0));
for (int i = 1; i < code.length(); ++i) {
encoding.add_str_int(",", code(i));
encoding += "," + std::to_string(code(i));
}
encoding += "\t";
if (c >= unicharset.size() ||
Expand Down
4 changes: 2 additions & 2 deletions src/ccutil/unicharcompress.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,14 @@ class TESS_API UnicharCompress {

bool DeSerialize(TFile *fp);

// Returns a STRING containing a text file that describes the encoding thus:
// Returns a string containing a text file that describes the encoding thus:
// <index>[,<index>]*<tab><UTF8-str><newline>
// In words, a comma-separated list of one or more indices, followed by a tab
// and the UTF-8 string that the code represents per line. Most simple scripts
// will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
// and the Indic scripts will contain a many-to-many mapping.
// See the class comment above for details.
STRING GetEncodingAsString(const UNICHARSET &unicharset) const;
std::string GetEncodingAsString(const UNICHARSET &unicharset) const;

// Helper decomposes a Hangul unicode to 3 parts, leading, vowel, trailing.
// Note that the returned values are 0-based indices, NOT unicode Jamo.
Expand Down
2 changes: 1 addition & 1 deletion src/classify/blobclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void Classify::LearnBlob(const STRING &fontname, TBLOB *blob, const DENORM &cn_d
tr_file_data_ += "\n";

// write micro-features to file and clean up
WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
WriteCharDescription(feature_defs_, CharDesc, tr_file_data_);
} else {
tprintf("Blob learned was invalid!\n");
}
Expand Down
Loading

0 comments on commit 3b07599

Please sign in to comment.