Skip to content

Commit

Permalink
Merge pull request #50 from level23/rowCount
Browse files Browse the repository at this point in the history
added rowCount() method
  • Loading branch information
jasper-ter-veen authored Sep 28, 2023
2 parents 4b30a26 + d7e8763 commit 306b712
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 29 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ for more information.
- [structure](#metadata-structure)
- [timeBoundary](#metadata-timeboundary)
- [dataSources](#metadata-datasources)
- [rowCount](#metadata-rowcount)
- [Reindex/compact data/kill](#reindex--compact-data--kill)
- [compact()](#compact)
- [reindex()](#reindex)
Expand Down Expand Up @@ -3644,6 +3645,25 @@ foreach($dataSources as $dataSource) {
}
```

#### `metadata()->rowCount()`

Retrieve the number of rows for the given dataSource and interval.

The `rowCount()` method has the following parameters:

| **Type** | **Optional/Required** | **Argument** | **Example** | **Description** |
|---------------------------|-----------------------|---------------|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| string | Required | `$dataSource` | "wikipedia" | The name of the dataSource (table) where you want to count the rows for. |
| string/int/DateTime | Required | `$start` | "now - 24 hours" | The start date to retrieve the row count for. See [interval()](#interval) for all allowed formats. |
| /string/int/DateTime/null | Optional | `$stop` | "now" | The stop date to retrieve the row count for. See [interval()](#interval) for all allowed formats. When a string containing a slash is given as start date, the stop date can be left out. |

Example:

```php
// Retrieve the total records for the past week.
$numRows = $client->metadata()->rowCount("wikipedia", "now - 1 week", "now");
```


## Reindex / compact data / kill

Expand Down
66 changes: 60 additions & 6 deletions src/Metadata/MetadataBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

use DateTime;
use Exception;
use DateTimeInterface;
use InvalidArgumentException;
use Level23\Druid\DruidClient;
use Level23\Druid\Types\TimeBound;
Expand Down Expand Up @@ -288,21 +289,74 @@ public function interval(string $dataSource, string $interval): array
* )
* )
*
* @param string $dataSource
* @param string $interval
* @param string $dataSource
* @param \DateTimeInterface|int|string $start
* @param \DateTimeInterface|int|string|null $stop
*
* @return array<int,array<string,string>>
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws \Level23\Druid\Exceptions\QueryResponseException
* @throws \Exception
*/
protected function getColumnsForInterval(
string $dataSource,
DateTimeInterface|int|string $start,
DateTimeInterface|int|string $stop = null
): array {
$response = $this->client->query($dataSource)
->interval($start, $stop)
->segmentMetadata();

$columns = [];

$rows = $response->data();

if (isset($rows[0])) {

/** @var array<string,array<string,array<string,string>>> $row */
$row = $rows[0];

if (isset($row['columns'])) {
array_walk($row['columns'], function ($value, $key) use (&$columns) {
$columns[] = array_merge($value, ['field' => $key]);
});
}
}

return $columns;
}

/**
* Return the total number of rows for the given interval
*
* @param string $dataSource The name of the dataSource where you want to count the
* rows for
* @param \DateTimeInterface|int|string $start The start of the interval.
* @param \DateTimeInterface|int|string|null $stop The end of the interval, or null when it was given as a
* "date/date" interval in the $start parameter.
*
* @return int
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws \Level23\Druid\Exceptions\QueryResponseException
* @throws \Exception
*/
protected function getColumnsForInterval(string $dataSource, string $interval): array
{
public function rowCount(
string $dataSource,
DateTimeInterface|int|string $start,
DateTimeInterface|int|string $stop = null
): int {
$response = $this->client->query($dataSource)
->interval($interval)
->interval($start, $stop)
->segmentMetadata();

return $response->data();
$totalRows = 0;
foreach ($response->data() as $row) {
if (isset($row['numRows'])) {
$totalRows += intval($row['numRows']);
}
}

return $totalRows;
}

/**
Expand Down
15 changes: 1 addition & 14 deletions src/Responses/SegmentMetadataQueryResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,6 @@ class SegmentMetadataQueryResponse extends QueryResponse
*/
public function data(): array
{
$columns = [];
if (isset($this->response[0])) {

/** @var array<string,array<string,array<string,string>>> $row */
$row = $this->response[0];

if (isset($row['columns'])) {
array_walk($row['columns'], function ($value, $key) use (&$columns) {
$columns[] = array_merge($value, ['field' => $key]);
});
}
}

return $columns;
return $this->response;
}
}
131 changes: 122 additions & 9 deletions tests/Metadata/MetadataBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,10 @@ public function testStructure(
}

/**
* @testWith [[{"columns": {"__time": {"type":"LONG"}}}]]
*
* @param array<string,array<string,array<string,string>>> $segmentMetadataResponse
*
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws \Level23\Druid\Exceptions\QueryResponseException
*/
public function testGetColumnsForInterval(array $segmentMetadataResponse): void
public function testGetColumnsForInterval(): void
{
$dataSource = 'myDataSource';
$interval = '2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z';
Expand All @@ -304,10 +300,62 @@ public function testGetColumnsForInterval(array $segmentMetadataResponse): void

$queryBuilder->shouldReceive('interval')
->once()
->with($interval)
->with($interval, null)
->andReturn($queryBuilder);

$responseObj = new SegmentMetadataQueryResponse($segmentMetadataResponse);
$rawResponse = [
[
'id' => 'myDataSource_2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z_2019-08-19T00:00:03.958Z',
'intervals' => ['2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z',],
'columns' =>
[
'__time' =>
[
'typeSignature' => 'LONG',
'type' => 'LONG',
'hasMultipleValues' => false,
'hasNulls' => false,
'size' => 0,
'cardinality' => null,
'minValue' => null,
'maxValue' => null,
'errorMessage' => null,
],
'iso' =>
[
'typeSignature' => 'STRING',
'type' => 'STRING',
'hasMultipleValues' => false,
'hasNulls' => false,
'size' => 0,
'cardinality' => 42,
'minValue' => 'be',
'maxValue' => 'zw',
'errorMessage' => null,
],
'counter' =>
[
'typeSignature' => 'LONG',
'type' => 'LONG',
'hasMultipleValues' => false,
'hasNulls' => false,
'size' => 0,
'cardinality' => null,
'minValue' => null,
'maxValue' => null,
'errorMessage' => null,
],
],
'size' => 0,
'numRows' => 151,
'aggregators' => null,
'timestampSpec' => null,
'queryGranularity' => null,
'rollup' => null,
],
];

$responseObj = new SegmentMetadataQueryResponse($rawResponse);

$queryBuilder->shouldReceive('segmentMetadata')
->once()
Expand All @@ -317,7 +365,73 @@ public function testGetColumnsForInterval(array $segmentMetadataResponse): void
->shouldAllowMockingProtectedMethods()
->getColumnsForInterval($dataSource, $interval);

$this->assertEquals($responseObj->data(), $response);
$expected = [];
array_walk($rawResponse[0]['columns'], function ($value, $key) use (&$expected) {
$expected[] = array_merge($value, ['field' => $key]);
});
$this->assertEquals($expected, $response);
}


/**
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws \Level23\Druid\Exceptions\QueryResponseException
*/
public function testRowCount(): void
{
$dataSource = 'myDataSource';
$interval = '2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z';
$metadataBuilder = Mockery::mock(MetadataBuilder::class, [$this->client]);
$metadataBuilder->makePartial();
$queryBuilder = Mockery::mock(QueryBuilder::class, [$this->client, 'myDataSource']);

$this->client->shouldReceive('query')
->with($dataSource)
->once()
->andReturn($queryBuilder);

$queryBuilder->shouldReceive('interval')
->once()
->with($interval, null)
->andReturn($queryBuilder);

$rawResponse = [
[
'id' => 'myDataSource_2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z_2019-08-19T00:00:03.958Z',
'intervals' => ['2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z',],
'columns' => [],
'size' => 0,
'numRows' => 151,
'aggregators' => null,
'timestampSpec' => null,
'queryGranularity' => null,
'rollup' => null,
],
[
'id' => 'myDataSource_2019-08-19T13:00:00.000Z/2019-08-19T15:00:00.000Z_2019-08-19T00:00:03.958Z',
'intervals' => ['2019-08-19T13:00:00.000Z/2019-08-19T14:00:00.000Z',],
'columns' => [],
'size' => 0,
'numRows' => 645,
'aggregators' => null,
'timestampSpec' => null,
'queryGranularity' => null,
'rollup' => null,
],
];

$responseObj = new SegmentMetadataQueryResponse($rawResponse);

$queryBuilder->shouldReceive('segmentMetadata')
->once()
->andReturn($responseObj);

$response = $metadataBuilder
->shouldAllowMockingProtectedMethods()
->rowCount($dataSource, $interval);


$this->assertEquals(645 + 151, $response);
}

/**
Expand Down Expand Up @@ -591,5 +705,4 @@ public function testDataSources(): void

$this->assertEquals(['wikipedia', 'clicks'], $response);
}

}

0 comments on commit 306b712

Please sign in to comment.