Replace more STRING by std::string

Remove STRING::add_str_int and STRING::add_str_double which are now unused. Signed-off-by: Stefan Weil <sw@weilnetz.de>
stweil · Mar 13, 2021 · 3b07599 · 3b07599
1 parent c9f0da4
commit 3b07599
Show file tree

Hide file tree

Showing 32 changed files with 207 additions and 247 deletions.
diff --git a/src/ccstruct/blamer.cpp b/src/ccstruct/blamer.cpp
@@ -168,19 +168,19 @@ void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
 // and the left edge of the right-hand word is word2_left.
 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
                                BlamerBundle *bundle2) const {
-  STRING debug_str;
+  std::string debug_str;
   // Find truth boxes that correspond to the split in the blobs.
   int b;
   int begin2_truth_index = -1;
   if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
     debug_str = "Looking for truth split at";
-    debug_str.add_str_int(" end1_x ", word1_right);
-    debug_str.add_str_int(" begin2_x ", word2_left);
+    debug_str += " end1_x " + std::to_string(word1_right);
+    debug_str += " begin2_x " + std::to_string(word2_left);
     debug_str += "\nnorm_truth_word boxes:\n";
     if (norm_truth_word_.length() > 1) {
-      norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
+      norm_truth_word_.BlobBox(0).print_to_str(debug_str);
       for (b = 1; b < norm_truth_word_.length(); ++b) {
-        norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
+        norm_truth_word_.BlobBox(b).print_to_str(debug_str);
         if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
             (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
           begin2_truth_index = b;
@@ -325,24 +325,24 @@ void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
   if (missing_chop || box_index < norm_truth_word_.length()) {
     STRING debug_str;
     if (missing_chop) {
-      debug_str.add_str_int("Detected missing chop (tolerance=", norm_box_tolerance_);
+      debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
       debug_str += ") at Bounding Box=";
       TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
-      curr_blob->bounding_box().print_to_str(&debug_str);
-      debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
+      curr_blob->bounding_box().print_to_str(debug_str);
+      debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
     } else {
-      debug_str.add_str_int("Missing chops for last ", norm_truth_word_.length() - box_index);
+      debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
       debug_str += " truth box(es)";
     }
     debug_str += "\nMaximally chopped word boxes:\n";
     for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
       TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
-      curr_blob->bounding_box().print_to_str(&debug_str);
+      curr_blob->bounding_box().print_to_str(debug_str);
       debug_str += '\n';
     }
     debug_str += "Truth  bounding  boxes:\n";
     for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
-      norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
+      norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
       debug_str += '\n';
     }
     SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
@@ -413,8 +413,8 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
     if (blob_index + 1 < num_blobs)
       next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
     int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
-    debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
-    debug_str.add_str_int(" ", truth_x);
+    debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
+    debug_str += " " + std::to_string(truth_x);
     debug_str += "\n";
     if (curr_box_x > (truth_x + norm_box_tolerance_)) {
       break;                                                  // failed to find a matching box
@@ -424,23 +424,23 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
       correct_segmentation_cols_.push_back(curr_box_col);
       correct_segmentation_rows_.push_back(next_box_col - 1);
       ++truth_idx;
-      debug_str.add_str_int("col=", curr_box_col);
-      debug_str.add_str_int(" row=", next_box_col - 1);
+      debug_str += "col=" + std::to_string(curr_box_col);
+      debug_str += " row=" + std::to_string(next_box_col - 1);
       debug_str += "\n";
       curr_box_col = next_box_col;
     }
   }
   if (blob_index < num_blobs || // trailing blobs
       correct_segmentation_cols_.size() != norm_truth_word_.length()) {
-    debug_str.add_str_int(
+    debug_str += 
         "Blamer failed to find correct segmentation"
-        " (tolerance=",
-        norm_box_tolerance_);
+        " (tolerance=" +
+        std::to_string(norm_box_tolerance_);
     if (blob_index >= num_blobs)
       debug_str += " blob == nullptr";
     debug_str += ")\n";
-    debug_str.add_str_int(" path length ", correct_segmentation_cols_.size());
-    debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
+    debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
+    debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
     debug_str += "\n";
     SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
     correct_segmentation_cols_.clear();
@@ -457,7 +457,7 @@ bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
 #if !defined(DISABLED_LEGACY_ENGINE)
 // Setup ready to guide the segmentation search to the correct segmentation.
 void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
-                                    UNICHAR_ID wildcard_id, bool debug, STRING *debug_str,
+                                    UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
                                     tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
                                     WERD_RES *word_res) {
   segsearch_is_looking_for_blame_ = true;
@@ -466,19 +466,19 @@ void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *rati
   }
   // Fill pain points for any unclassifed blob corresponding to the
   // correct segmentation state.
-  *debug_str += "Correct segmentation:\n";
+  debug_str += "Correct segmentation:\n";
   for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
-    debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
-    debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
-    *debug_str += "\n";
+    debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
+    debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
+    debug_str += "\n";
     if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
                              wildcard_id) &&
         !pain_points->GeneratePainPoint(
             correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
             tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
       segsearch_is_looking_for_blame_ = false;
-      *debug_str += "\nFailed to insert pain point\n";
-      SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
+      debug_str += "\nFailed to insert pain point\n";
+      SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
       break;
     }
   } // end for blamer_bundle->correct_segmentation_cols/rows
@@ -491,7 +491,7 @@ bool BlamerBundle::GuidedSegsearchStillGoing() const {
 }
 
 // The segmentation search has ended. Sets the blame appropriately.
-void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str) {
+void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
   // If we are still looking for blame (i.e. best_choice is incorrect, but a
   // path representing the correct segmentation could be constructed), we can
   // blame segmentation search pain point prioritization if the rating of the
@@ -506,22 +506,22 @@ void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, S
   if (segsearch_is_looking_for_blame_) {
     segsearch_is_looking_for_blame_ = false;
     if (best_choice_is_dict_and_top_choice_) {
-      *debug_str = "Best choice is: incorrect, top choice, dictionary word";
-      *debug_str += " with permuter ";
-      *debug_str += best_choice->permuter_name();
-      SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
+      debug_str = "Best choice is: incorrect, top choice, dictionary word";
+      debug_str += " with permuter ";
+      debug_str += best_choice->permuter_name();
+      SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
     } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
-      *debug_str += "Correct segmentation state was not explored";
-      SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
+      debug_str += "Correct segmentation state was not explored";
+      SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
     } else {
       if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
-        *debug_str += "Correct segmentation paths were pruned by LM\n";
+        debug_str += "Correct segmentation paths were pruned by LM\n";
       } else {
-        debug_str->add_str_double("Best correct segmentation rating ",
-                                  best_correctly_segmented_rating_);
-        debug_str->add_str_double(" vs. best choice rating ", best_choice->rating());
+        debug_str += "Best correct segmentation rating " +
+                                  std::to_string(best_correctly_segmented_rating_);
+        debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
       }
-      SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
+      SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
     }
   }
 }

diff --git a/src/ccstruct/blamer.h b/src/ccstruct/blamer.h
@@ -273,12 +273,12 @@ struct BlamerBundle {
   bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
   // Setup ready to guide the segmentation search to the correct segmentation.
   void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
-                        bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points,
+                        bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
                         double max_char_wh_ratio, WERD_RES *word_res);
   // Returns true if the guided segsearch is in progress.
   bool GuidedSegsearchStillGoing() const;
   // The segmentation search has ended. Sets the blame appropriately.
-  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str);
+  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);
 
   // If the bundle is null or still does not indicate the correct result,
   // fix it and use some backup reason for the blame.

diff --git a/src/ccstruct/boxread.cpp b/src/ccstruct/boxread.cpp
@@ -115,8 +115,8 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
     if (texts != nullptr)
       texts->push_back(utf8_str);
     if (box_texts != nullptr) {
-      STRING full_text;
-      MakeBoxFileStr(utf8_str.c_str(), box, target_page, &full_text);
+      std::string full_text;
+      MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
       box_texts->push_back(full_text);
     }
     if (pages != nullptr)
@@ -255,13 +255,13 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
 }
 
 // Creates a box file string from a unichar string, TBOX and page number.
-void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str) {
-  *box_str = unichar_str;
-  box_str->add_str_int(" ", box.left());
-  box_str->add_str_int(" ", box.bottom());
-  box_str->add_str_int(" ", box.right());
-  box_str->add_str_int(" ", box.top());
-  box_str->add_str_int(" ", page_num);
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str) {
+  box_str = unichar_str;
+  box_str += " " + std::to_string(box.left());
+  box_str += " " + std::to_string(box.bottom());
+  box_str += " " + std::to_string(box.right());
+  box_str += " " + std::to_string(box.top());
+  box_str += " " + std::to_string(page_num);
 }
 
 } // namespace tesseract
diff --git a/src/ccstruct/boxread.h b/src/ccstruct/boxread.h
@@ -80,7 +80,7 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
 
 // Creates a box file string from a unichar string, TBOX and page number.
 TESS_API
-void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str);
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str);
 
 } // namespace tesseract
 

diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp
@@ -479,8 +479,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
     int index = 0;
     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
       WERD_CHOICE *choice = it.data();
-      STRING label;
-      label.add_str_int("\nCooked Choice #", index);
+      std::string label;
+      label += "\nCooked Choice #" + std::to_string(index);
       choice->print(label.c_str());
     }
   }

diff --git a/src/ccstruct/rect.cpp b/src/ccstruct/rect.cpp
@@ -167,13 +167,13 @@ void TBOX::plot(                    // paint box
 #endif
 
 // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
-void TBOX::print_to_str(STRING *str) const {
+void TBOX::print_to_str(std::string &str) const {
   // "(%d,%d)->(%d,%d)", left(), bottom(), right(), top()
-  str->add_str_int("(", left());
-  str->add_str_int(",", bottom());
-  str->add_str_int(")->(", right());
-  str->add_str_int(",", top());
-  *str += ')';
+  str += "(" + std::to_string(left());
+  str += "," + std::to_string(bottom());
+  str += ")->(" + std::to_string(right());
+  str += "," + std::to_string(top());
+  str += ')';
 }
 
 // Writes to the given file. Returns false in case of error.

diff --git a/src/ccstruct/rect.h b/src/ccstruct/rect.h
@@ -2,7 +2,6 @@
  * File:        rect.h  (Formerly box.h)
  * Description: Bounding box class definition.
  * Author:      Phil Cheatle
- * Created:     Wed Oct 16 15:18:45 BST 1991
  *
  * (C) Copyright 1991, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,11 +29,10 @@
 #include <cmath>     // for std::ceil, std::floor
 #include <cstdint>   // for INT16_MAX
 #include <cstdio>    // for FILE
+#include <string>    // for std::string
 
 namespace tesseract {
 
-class STRING;
-
 class TESS_API TBOX { // bounding box
 public:
   TBOX()
@@ -287,8 +285,8 @@ class TESS_API TBOX { // bounding box
   void print() const { // print
     tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top());
   }
-  // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
-  void print_to_str(STRING *str) const;
+  // Appends the bounding box as (%d,%d)->(%d,%d) to a string.
+  void print_to_str(std::string &str) const;
 
 #ifndef GRAPHICS_DISABLED
   void plot(                  // use current settings

diff --git a/src/ccutil/strngs.cpp b/src/ccutil/strngs.cpp
@@ -102,27 +102,4 @@ void STRING::split(const char c, std::vector<STRING> *splited) {
   }
 }
 
-void STRING::add_str_int(const char *str, int number) {
-  if (str != nullptr)
-    *this += str;
-  // Allow space for the maximum possible length of int64_t.
-  char num_buffer[kMaxIntSize];
-  snprintf(num_buffer, kMaxIntSize - 1, "%d", number);
-  num_buffer[kMaxIntSize - 1] = '\0';
-  *this += num_buffer;
-}
-
-// Appends the given string and double (as a %.8g) to this.
-void STRING::add_str_double(const char *str, double number) {
-  if (str != nullptr)
-    *this += str;
-  std::stringstream stream;
-  // Use "C" locale (needed for double value).
-  stream.imbue(std::locale::classic());
-  // Use 8 digits for double value.
-  stream.precision(8);
-  stream << number;
-  *this += stream.str().c_str();
-}
-
 } // namespace tesseract
diff --git a/src/ccutil/strngs.h b/src/ccutil/strngs.h
@@ -78,16 +78,6 @@ class STRING : public std::string {
 
   TESS_API
   void split(char c, std::vector<STRING> *splited);
-
-  // Appends the given string and int (as a %d) to this.
-  // += cannot be used for ints as there as a char += operator that would
-  // be ambiguous, and ints usually need a string before or between them
-  // anyway.
-  TESS_API
-  void add_str_int(const char *str, int number);
-  // Appends the given string and double (as a %.8g) to this.
-  TESS_API
-  void add_str_double(const char *str, double number);
 };
 
 } // namespace tesseract.

diff --git a/src/ccutil/unicharcompress.cpp b/src/ccutil/unicharcompress.cpp
@@ -325,17 +325,17 @@ bool UnicharCompress::DeSerialize(TFile *fp) {
 // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
 // and the Indic scripts will contain a many-to-many mapping.
 // See the class comment above for details.
-STRING UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
-  STRING encoding;
+std::string UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
+  std::string encoding;
   for (int c = 0; c < encoder_.size(); ++c) {
     const RecodedCharID &code = encoder_[c];
     if (0 < c && c < SPECIAL_UNICHAR_CODES_COUNT && code == encoder_[c - 1]) {
       // Don't show the duplicate entry.
       continue;
     }
-    encoding.add_str_int("", code(0));
+    encoding += std::to_string(code(0));
     for (int i = 1; i < code.length(); ++i) {
-      encoding.add_str_int(",", code(i));
+      encoding += "," + std::to_string(code(i));
     }
     encoding += "\t";
     if (c >= unicharset.size() ||

diff --git a/src/ccutil/unicharcompress.h b/src/ccutil/unicharcompress.h
@@ -196,14 +196,14 @@ class TESS_API UnicharCompress {
 
   bool DeSerialize(TFile *fp);
 
-  // Returns a STRING containing a text file that describes the encoding thus:
+  // Returns a string containing a text file that describes the encoding thus:
   // <index>[,<index>]*<tab><UTF8-str><newline>
   // In words, a comma-separated list of one or more indices, followed by a tab
   // and the UTF-8 string that the code represents per line. Most simple scripts
   // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
   // and the Indic scripts will contain a many-to-many mapping.
   // See the class comment above for details.
-  STRING GetEncodingAsString(const UNICHARSET &unicharset) const;
+  std::string GetEncodingAsString(const UNICHARSET &unicharset) const;
 
   // Helper decomposes a Hangul unicode to 3 parts, leading, vowel, trailing.
   // Note that the returned values are 0-based indices, NOT unicode Jamo.

diff --git a/src/classify/blobclass.cpp b/src/classify/blobclass.cpp
@@ -80,7 +80,7 @@ void Classify::LearnBlob(const STRING &fontname, TBLOB *blob, const DENORM &cn_d
     tr_file_data_ += "\n";
 
     // write micro-features to file and clean up
-    WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
+    WriteCharDescription(feature_defs_, CharDesc, tr_file_data_);
   } else {
     tprintf("Blob learned was invalid!\n");
   }