Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix real text too short #152

Merged
merged 8 commits into from
Dec 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Faker/Generator.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
* @method string text($maxNbChars = 200)
*
* @method string realText($maxNbChars = 200, $indexSize = 2)
* @method string realTextBetween($minNbChars = 150, $maxNbChars = 200, $indexSize = 2)
*
* @property string $email
* @property string $safeEmail
Expand Down
49 changes: 49 additions & 0 deletions src/Faker/Provider/Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,31 @@ abstract class Text extends Base
*/
public function realText($maxNbChars = 200, $indexSize = 2)
{
return $this->realTextBetween((int) round($maxNbChars * 0.8), $maxNbChars, $indexSize);
}

/**
* Generate a text string by the Markov chain algorithm.
*
* Depending on the $maxNbChars, returns a random valid looking text. The algorithm
* generates a weighted table with the specified number of words as the index and the
* possible following words as the value.
*
* @example 'Alice, swallowing down her flamingo, and began by taking the little golden key'
* @param int $minNbChars Minimum number of characters the text should contain (maximum: 8)
* @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10)
* @param int $indexSize Determines how many words are considered for the generation of the next word.
* The minimum is 1, and it produces a higher level of randomness, although the
* generated text usually doesn't make sense. Higher index sizes (up to 5)
* produce more correct text, at the price of less randomness.
* @return string
*/
public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize = 2)
{
if ($minNbChars < 1) {
throw new \InvalidArgumentException('minNbChars must be at least 1');
}

if ($maxNbChars < 10) {
throw new \InvalidArgumentException('maxNbChars must be at least 10');
}
Expand All @@ -40,7 +65,31 @@ public function realText($maxNbChars = 200, $indexSize = 2)
throw new \InvalidArgumentException('indexSize must be at most 5');
}

if ($minNbChars >= $maxNbChars) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldnt we set a minimum here as well? And check for valid values since an -10 can also be passed

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've now added a check that $minNbChars is not smaller than 1.

It doesn't make much sense to set $minNbChars to something much smaller than $maxNbChars, but it also doesn't prevent the algorithm from working as expected (producing real text that is between min and max chars long), so I wouldn't prohibit doing it.

throw new \InvalidArgumentException('minNbChars must be smaller than maxNbChars');
}

$words = $this->getConsecutiveWords($indexSize);
$iterations = 0;
do {
$iterations++;
if ($iterations >= 100) {
throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations));
}

$result = $this->generateText($maxNbChars, $words);
} while (static::strlen($result) <= $minNbChars);

return $result;
}

/**
* @param int $maxNbChars
* @param array $words
* @return string
*/
protected function generateText($maxNbChars, $words)
{
$result = [];
$resultLength = 0;
// take a random starting point
Expand Down
59 changes: 55 additions & 4 deletions test/Faker/Provider/TextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,28 @@ final class TextTest extends TestCase
* [200]
* [500]
*/
public function testTextMaxLength($length)
public function testRealTextMaxLength($length)
{
self::assertLessThan($length, strlen($this->faker->realText($length)));
}

public function testTextMaxIndex()
/**
* @testWith [10]
* [20]
* [50]
* [70]
* [90]
* [120]
* [150]
* [200]
* [500]
*/
public function testRealTextMinLength($length)
{
self::assertGreaterThanOrEqual($length * 0.8, strlen($this->faker->realText($length)));
}

public function testRealTextMaxIndex()
{
$this->expectException(\InvalidArgumentException::class);

Expand All @@ -32,7 +48,7 @@ public function testTextMaxIndex()
self::fail('The index should be less than or equal to 5.');
}

public function testTextMinIndex()
public function testRealTextMinIndex()
{
$this->expectException(\InvalidArgumentException::class);

Expand All @@ -41,7 +57,7 @@ public function testTextMinIndex()
self::fail('The index should be greater than or equal to 1.');
}

public function testTextMinLength()
public function testRealTextMinNbChars()
{
$this->expectException(\InvalidArgumentException::class);

Expand All @@ -50,6 +66,41 @@ public function testTextMinLength()
self::fail('The text should be at least 10 characters.');
}

/**
* @testWith [1, 10]
* [5, 10]
* [8, 10]
* [18, 20]
* [45, 50]
* [180, 200]
* [1950, 2000]
*/
public function testRealTextBetweenTextLength($min, $max)
{
$strlen = strlen($this->faker->realTextBetween($min, $max));

self::assertGreaterThan($min, $strlen);
self::assertLessThan($max, $strlen);
}

public function testRealTextBetweenMinNbChars()
{
$this->expectException(\InvalidArgumentException::class);

$this->faker->realTextBetween(25, 20);

self::fail('minNbChars should be smaller than maxNbChars');
}

public function testRealTextBetweenMinNbCharsGreaterThan1()
{
$this->expectException(\InvalidArgumentException::class);

$this->faker->realTextBetween(0, 30);

self::fail('minNbChars must be bigger than 0');
}

protected function getProviders(): iterable
{
yield new Text($this->faker);
Expand Down