Skip to content

Commit

Permalink
Add audb.stream() and audb.DatabaseIterator (#448)
Browse files Browse the repository at this point in the history
* Add map argument to audb.load_table()

* Improve docstring example

* Add audb.stream()

* Add more ideas

* Create first working version of stream

* Move to extra file

* Use extra class for parquet

* Add solution for csv files

* Add shuffle support for CSV

* Finish first implementation

* Add string representation

* Adjust docstring

* Add to documentation

* Fix raises section of docs

* Use DatabaseIterator name, shuffle in example

* Fix map

* Simplify docstring example

* Inherit DatabaseIterator from audformat.Database

* Improve example

* Fix type annotations of __next__ + __iter__

* Fix storing in flavor cache folder

* Use __iter__ form audformat.Database

* Revert "Use __iter__ form audformat.Database"

This reverts commit 2bfc800.

* Fix buffering

* Add first tests

* Add tests and fix bugs

* Extend docstring of audb.stream()

* Add section to usage documentation

* DEBUG Windows

* DEBUG Windows

* Try chunks for CSV reader

* Restructure Iterator classes

* Try to fix doctest under Windows

* Set default value of buffer_size to 100_000

* Update docs/load.rst

Co-authored-by: ChristianGeng <christian.c.geng@gmail.com>

* Use **kwargs to simplify code

* Remove __init__ function in inherited Iterators

* Make surfe persistent repository is cleaned

* Make DatabaseIterator abstract class

---------

Co-authored-by: ChristianGeng <christian.c.geng@gmail.com>
  • Loading branch information
hagenw and ChristianGeng authored Aug 21, 2024
1 parent 06b029f commit 470091f
Show file tree
Hide file tree
Showing 8 changed files with 1,152 additions and 2 deletions.
2 changes: 2 additions & 0 deletions audb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from audb.core.load_to import load_to
from audb.core.publish import publish
from audb.core.repository import Repository
from audb.core.stream import DatabaseIterator
from audb.core.stream import stream


__all__ = []
Expand Down
25 changes: 23 additions & 2 deletions audb/core/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from audb.core.utils import lookup_backend


CachedVersions = typing.Sequence[typing.Tuple[audeer.StrictVersion, str, Dependencies],]
CachedVersions = typing.Sequence[typing.Tuple[audeer.StrictVersion, str, Dependencies]]


def _cached_versions(
Expand Down Expand Up @@ -805,7 +805,28 @@ def _misc_tables_used_in_scheme(
if scheme.uses_table:
misc_tables_used_in_scheme.append(scheme.labels)

return list(set(misc_tables_used_in_scheme))
return audeer.unique(misc_tables_used_in_scheme)


def _misc_tables_used_in_table(
table: audformat.Table,
) -> typing.List[str]:
r"""List of misc tables that are used inside schemes of a table.
Args:
table: table object
Returns:
unique list of misc tables used in schemes of the table
"""
misc_tables_used_in_table = []
for column_id, column in table.columns.items():
if column.scheme_id is not None:
scheme = table.db.schemes[column.scheme_id]
if scheme.uses_table:
misc_tables_used_in_table.append(scheme.labels)
return audeer.unique(misc_tables_used_in_table)


def _missing_files(
Expand Down
Loading

0 comments on commit 470091f

Please sign in to comment.