From 5b89dd67bcda46786c63dd920ec9e845fe8538f3 Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 00:01:28 +0100 Subject: [PATCH 1/8] fix real text too short Signed-off-by: Daniel Schmelz --- src/Faker/Provider/Text.php | 20 ++++++++++++++++++++ test/Faker/Provider/TextTest.php | 18 +++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 6de01239ce..8018de61cd 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -40,6 +40,26 @@ public function realText($maxNbChars = 200, $indexSize = 2) throw new \InvalidArgumentException('indexSize must be at most 5'); } + $iterations = 0; + do { + $iterations ++; + if ($iterations >= 100) { + throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations)); + } + + $result = $this->generateText($indexSize, $maxNbChars); + } while (strlen($result) < $maxNbChars * 0.8); + + return static::appendEnd($result); + } + + /** + * @param int $indexSize + * @param int $maxNbChars + * @return string + */ + protected function generateText(int $indexSize, int $maxNbChars) + { $words = $this->getConsecutiveWords($indexSize); $result = []; $resultLength = 0; diff --git a/test/Faker/Provider/TextTest.php b/test/Faker/Provider/TextTest.php index da78dfdeed..e2fa997a77 100644 --- a/test/Faker/Provider/TextTest.php +++ b/test/Faker/Provider/TextTest.php @@ -23,6 +23,22 @@ public function testTextMaxLength($length) self::assertLessThan($length, strlen($this->faker->realText($length))); } + /** + * @testWith [10] + * [20] + * [50] + * [70] + * [90] + * [120] + * [150] + * [200] + * [500] + */ + public function testTextMinLength($length) + { + self::assertGreaterThanOrEqual($length * 0.8, strlen($this->faker->realText($length))); + } + public function testTextMaxIndex() { $this->expectException(\InvalidArgumentException::class); @@ -41,7 +57,7 @@ public function testTextMinIndex() self::fail('The index should be greater than or equal to 1.'); } - public function testTextMinLength() + public function testTextMinNbChars() { $this->expectException(\InvalidArgumentException::class); From b62953738240fc1d07b9d6d4659caaaeb16e8011 Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 03:56:05 +0100 Subject: [PATCH 2/8] fix real text too short --- src/Faker/Provider/Text.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 8018de61cd..86b5034a9d 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -40,27 +40,30 @@ public function realText($maxNbChars = 200, $indexSize = 2) throw new \InvalidArgumentException('indexSize must be at most 5'); } + $words = $this->getConsecutiveWords($indexSize); + $iterations = 0; + $minNbChars = $maxNbChars * 0.8; + do { $iterations ++; if ($iterations >= 100) { throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations)); } - $result = $this->generateText($indexSize, $maxNbChars); - } while (strlen($result) < $maxNbChars * 0.8); + $result = $this->generateText($maxNbChars, $words); + } while (strlen($result) < $minNbChars); return static::appendEnd($result); } /** - * @param int $indexSize - * @param int $maxNbChars + * @param int $maxNbChars + * @param array $words * @return string */ - protected function generateText(int $indexSize, int $maxNbChars) + protected function generateText($maxNbChars, $words) { - $words = $this->getConsecutiveWords($indexSize); $result = []; $resultLength = 0; // take a random starting point From 8658f4b3b2ef73368e42be4d4e574cbb1a3fcbaf Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 08:05:31 +0100 Subject: [PATCH 3/8] add realTextBetween --- src/Faker/Generator.php | 1 + src/Faker/Provider/Text.php | 32 +++++++++++++++++++++++----- test/Faker/Provider/TextTest.php | 36 +++++++++++++++++++++++++++----- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/Faker/Generator.php b/src/Faker/Generator.php index 84bdd5da08..70f22dc2fc 100644 --- a/src/Faker/Generator.php +++ b/src/Faker/Generator.php @@ -73,6 +73,7 @@ * @method string text($maxNbChars = 200) * * @method string realText($maxNbChars = 200, $indexSize = 2) + * @method string realTextBetween($minNbChars = 150, $maxNbChars = 200, $indexSize = 2) * * @property string $email * @property string $safeEmail diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 86b5034a9d..82b413de55 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -27,6 +27,27 @@ abstract class Text extends Base * @return string */ public function realText($maxNbChars = 200, $indexSize = 2) + { + return $this->realTextBetween(round($maxNbChars * 0.8), $maxNbChars, $indexSize); + } + + /** + * Generate a text string by the Markov chain algorithm. + * + * Depending on the $maxNbChars, returns a random valid looking text. The algorithm + * generates a weighted table with the specified number of words as the index and the + * possible following words as the value. + * + * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key' + * @param int $minNbChars Minimum number of characters the text should contain (maximum: 8) + * @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10) + * @param int $indexSize Determines how many words are considered for the generation of the next word. + * The minimum is 1, and it produces a higher level of randomness, although the + * generated text usually doesn't make sense. Higher index sizes (up to 5) + * produce more correct text, at the price of less randomness. + * @return string + */ + public function realTextBetween($minNbChars = 150, $maxNbChars = 200, $indexSize = 2) { if ($maxNbChars < 10) { throw new \InvalidArgumentException('maxNbChars must be at least 10'); @@ -40,19 +61,20 @@ public function realText($maxNbChars = 200, $indexSize = 2) throw new \InvalidArgumentException('indexSize must be at most 5'); } - $words = $this->getConsecutiveWords($indexSize); + if ($minNbChars >= $maxNbChars) { + throw new \InvalidArgumentException('minNbChars must be smaller than maxNbChars'); + } + $words = $this->getConsecutiveWords($indexSize); $iterations = 0; - $minNbChars = $maxNbChars * 0.8; - do { - $iterations ++; + $iterations++; if ($iterations >= 100) { throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations)); } $result = $this->generateText($maxNbChars, $words); - } while (strlen($result) < $minNbChars); + } while (strlen($result) <= $minNbChars); return static::appendEnd($result); } diff --git a/test/Faker/Provider/TextTest.php b/test/Faker/Provider/TextTest.php index e2fa997a77..fb9fe7be57 100644 --- a/test/Faker/Provider/TextTest.php +++ b/test/Faker/Provider/TextTest.php @@ -18,7 +18,7 @@ final class TextTest extends TestCase * [200] * [500] */ - public function testTextMaxLength($length) + public function testRealTextMaxLength($length) { self::assertLessThan($length, strlen($this->faker->realText($length))); } @@ -34,12 +34,12 @@ public function testTextMaxLength($length) * [200] * [500] */ - public function testTextMinLength($length) + public function testRealTextMinLength($length) { self::assertGreaterThanOrEqual($length * 0.8, strlen($this->faker->realText($length))); } - public function testTextMaxIndex() + public function testRealTextMaxIndex() { $this->expectException(\InvalidArgumentException::class); @@ -48,7 +48,7 @@ public function testTextMaxIndex() self::fail('The index should be less than or equal to 5.'); } - public function testTextMinIndex() + public function testRealTextMinIndex() { $this->expectException(\InvalidArgumentException::class); @@ -57,7 +57,7 @@ public function testTextMinIndex() self::fail('The index should be greater than or equal to 1.'); } - public function testTextMinNbChars() + public function testRealTextMinNbChars() { $this->expectException(\InvalidArgumentException::class); @@ -66,6 +66,32 @@ public function testTextMinNbChars() self::fail('The text should be at least 10 characters.'); } + /** + * @testWith [0, 10] + * [5, 10] + * [8, 10] + * [8, 20] + * [10, 50] + * [150, 200] + * [1700, 2000] + */ + public function testRealTextBetweenTextLength($min, $max) + { + $strlen = strlen($this->faker->realTextBetween($min, $max)); + + self::assertGreaterThan($min, $strlen); + self::assertLessThan($max, $strlen); + } + + public function testRealTextBetweenMinNbChars() + { + $this->expectException(\InvalidArgumentException::class); + + $this->faker->realTextBetween(9, 9); + + self::fail('minNbChars should be smaller than maxNbChars'); + } + protected function getProviders(): iterable { yield new Text($this->faker); From 0f470742ba554c258e7082792033fd44657527b9 Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 08:08:30 +0100 Subject: [PATCH 4/8] add realTextBetween --- src/Faker/Provider/Text.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 82b413de55..80eed170f3 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -28,7 +28,7 @@ abstract class Text extends Base */ public function realText($maxNbChars = 200, $indexSize = 2) { - return $this->realTextBetween(round($maxNbChars * 0.8), $maxNbChars, $indexSize); + return $this->realTextBetween((int) round($maxNbChars * 0.8), $maxNbChars, $indexSize); } /** From 9b03f9d1b5f782782de134341be92d5a587585fd Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 08:25:49 +0100 Subject: [PATCH 5/8] add realTextBetween --- src/Faker/Provider/Text.php | 2 +- test/Faker/Provider/TextTest.php | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 80eed170f3..f842a83ea6 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -47,7 +47,7 @@ public function realText($maxNbChars = 200, $indexSize = 2) * produce more correct text, at the price of less randomness. * @return string */ - public function realTextBetween($minNbChars = 150, $maxNbChars = 200, $indexSize = 2) + public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize = 2) { if ($maxNbChars < 10) { throw new \InvalidArgumentException('maxNbChars must be at least 10'); diff --git a/test/Faker/Provider/TextTest.php b/test/Faker/Provider/TextTest.php index fb9fe7be57..587b210db0 100644 --- a/test/Faker/Provider/TextTest.php +++ b/test/Faker/Provider/TextTest.php @@ -70,10 +70,10 @@ public function testRealTextMinNbChars() * @testWith [0, 10] * [5, 10] * [8, 10] - * [8, 20] - * [10, 50] - * [150, 200] - * [1700, 2000] + * [18, 20] + * [45, 50] + * [180, 200] + * [1950, 2000] */ public function testRealTextBetweenTextLength($min, $max) { From 8a5ef508f1f57ff97b707e9055402300519bbccd Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 09:06:39 +0100 Subject: [PATCH 6/8] add realTextBetween --- src/Faker/Provider/Text.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index f842a83ea6..a2c79bf148 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -76,7 +76,7 @@ public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize $result = $this->generateText($maxNbChars, $words); } while (strlen($result) <= $minNbChars); - return static::appendEnd($result); + return $result; } /** From 0c64f444a5a87dbeeaa6afa98c83eed62ce0ee80 Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 11:37:51 +0100 Subject: [PATCH 7/8] add realTextBetween --- src/Faker/Provider/Text.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index a2c79bf148..1331315d41 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -74,7 +74,7 @@ public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize } $result = $this->generateText($maxNbChars, $words); - } while (strlen($result) <= $minNbChars); + } while (static::strlen($result) <= $minNbChars); return $result; } From dc366a1de873d3945f2d78150deb5be405bb42a5 Mon Sep 17 00:00:00 2001 From: Daniel Schmelz Date: Sat, 19 Dec 2020 17:41:01 +0100 Subject: [PATCH 8/8] edge case minNbChars < 1 --- src/Faker/Provider/Text.php | 4 ++++ test/Faker/Provider/TextTest.php | 13 +++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Faker/Provider/Text.php b/src/Faker/Provider/Text.php index 1331315d41..3c09559199 100644 --- a/src/Faker/Provider/Text.php +++ b/src/Faker/Provider/Text.php @@ -49,6 +49,10 @@ public function realText($maxNbChars = 200, $indexSize = 2) */ public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize = 2) { + if ($minNbChars < 1) { + throw new \InvalidArgumentException('minNbChars must be at least 1'); + } + if ($maxNbChars < 10) { throw new \InvalidArgumentException('maxNbChars must be at least 10'); } diff --git a/test/Faker/Provider/TextTest.php b/test/Faker/Provider/TextTest.php index 587b210db0..72aed53964 100644 --- a/test/Faker/Provider/TextTest.php +++ b/test/Faker/Provider/TextTest.php @@ -67,7 +67,7 @@ public function testRealTextMinNbChars() } /** - * @testWith [0, 10] + * @testWith [1, 10] * [5, 10] * [8, 10] * [18, 20] @@ -87,11 +87,20 @@ public function testRealTextBetweenMinNbChars() { $this->expectException(\InvalidArgumentException::class); - $this->faker->realTextBetween(9, 9); + $this->faker->realTextBetween(25, 20); self::fail('minNbChars should be smaller than maxNbChars'); } + public function testRealTextBetweenMinNbCharsGreaterThan1() + { + $this->expectException(\InvalidArgumentException::class); + + $this->faker->realTextBetween(0, 30); + + self::fail('minNbChars must be bigger than 0'); + } + protected function getProviders(): iterable { yield new Text($this->faker);