Skip to content

anystore.io

Generic io helpers

anystore is built on top of fsspec and provides an easy wrapper for reading and writing content from and to arbitrary locations using the io command:

Command-line usage
anystore io -i ./local/foo.txt -o s3://mybucket/other.txt

echo "hello" | anystore io -o sftp://user:password@host:/tmp/world.txt

anystore io -i https://investigativedata.io > index.html
Python usage
from anystore import smart_read, smart_write

data = smart_read("s3://mybucket/data.txt")
smart_write(".local/data", data)

smart_open(uri, mode=DEFAULT_MODE, **kwargs)

IO context similar to pythons built-in open().

Example
from anystore import smart_open

with smart_open("s3://mybucket/foo.csv") as fh:
    return fh.read()

Parameters:

Name Type Description Default
uri Uri

string or path-like key uri to open, e.g. ./local/data.txt or s3://mybucket/foo

required
mode str | None

open mode, default rb for byte reading.

DEFAULT_MODE
**kwargs Any

pass through storage-specific options

{}

Yields:

Type Description
IO

A generic file-handler like context object

Source code in anystore/io.py
@contextlib.contextmanager
def smart_open(
    uri: Uri,
    mode: str | None = DEFAULT_MODE,
    **kwargs: Any,
) -> Generator[IO, None, None]:
    """
    IO context similar to pythons built-in `open()`.

    Example:
        ```python
        from anystore import smart_open

        with smart_open("s3://mybucket/foo.csv") as fh:
            return fh.read()
        ```

    Args:
        uri: string or path-like key uri to open, e.g. `./local/data.txt` or `s3://mybucket/foo`
        mode: open mode, default `rb` for byte reading.
        **kwargs: pass through storage-specific options

    Yields:
        A generic file-handler like context object
    """
    handler = SmartHandler(uri, mode=mode, **kwargs)
    try:
        yield handler.open()
    except FileNotFoundError as e:
        raise DoesNotExist from e
    finally:
        handler.close()

smart_read(uri, mode=DEFAULT_MODE, **kwargs)

Return content for a given file-like key directly.

Parameters:

Name Type Description Default
uri Uri

string or path-like key uri to open, e.g. ./local/data.txt or s3://mybucket/foo

required
mode str | None

open mode, default rb for byte reading.

DEFAULT_MODE
**kwargs Any

pass through storage-specific options

{}

Returns:

Type Description
AnyStr

str or byte content, depending on mode

Source code in anystore/io.py
def smart_read(uri: Uri, mode: str | None = DEFAULT_MODE, **kwargs: Any) -> AnyStr:
    """
    Return content for a given file-like key directly.

    Args:
        uri: string or path-like key uri to open, e.g. `./local/data.txt` or `s3://mybucket/foo`
        mode: open mode, default `rb` for byte reading.
        **kwargs: pass through storage-specific options

    Returns:
        `str` or `byte` content, depending on `mode`
    """
    with smart_open(uri, mode, **kwargs) as fh:
        return fh.read()

smart_stream(uri, mode=DEFAULT_MODE, **kwargs)

Stream content line by line.

Example
import orjson
from anystore import smart_stream

while data := smart_stream("s3://mybucket/data.json"):
    yield orjson.loads(data)

Parameters:

Name Type Description Default
uri Uri

string or path-like key uri to open, e.g. ./local/data.txt or s3://mybucket/foo

required
mode str | None

open mode, default rb for byte reading.

DEFAULT_MODE
**kwargs Any

pass through storage-specific options

{}

Yields:

Type Description
AnyStr

A generator of str or byte content, depending on mode

Source code in anystore/io.py
def smart_stream(
    uri: Uri, mode: str | None = DEFAULT_MODE, **kwargs: Any
) -> Generator[AnyStr, None, None]:
    """
    Stream content line by line.

    Example:
        ```python
        import orjson
        from anystore import smart_stream

        while data := smart_stream("s3://mybucket/data.json"):
            yield orjson.loads(data)
        ```

    Args:
        uri: string or path-like key uri to open, e.g. `./local/data.txt` or `s3://mybucket/foo`
        mode: open mode, default `rb` for byte reading.
        **kwargs: pass through storage-specific options

    Yields:
        A generator of `str` or `byte` content, depending on `mode`
    """
    with smart_open(uri, mode, **kwargs) as fh:
        while line := fh.readline():
            yield line

smart_write(uri, content, mode=DEFAULT_WRITE_MODE, **kwargs)

Write content to a given file-like key directly.

Parameters:

Name Type Description Default
uri Uri

string or path-like key uri to open, e.g. ./local/data.txt or s3://mybucket/foo

required
content bytes | str

str or bytes content to write.

required
mode str | None

open mode, default wb for byte writing.

DEFAULT_WRITE_MODE
**kwargs Any

pass through storage-specific options

{}
Source code in anystore/io.py
def smart_write(
    uri: Uri, content: bytes | str, mode: str | None = DEFAULT_WRITE_MODE, **kwargs: Any
) -> None:
    """
    Write content to a given file-like key directly.

    Args:
        uri: string or path-like key uri to open, e.g. `./local/data.txt` or `s3://mybucket/foo`
        content: `str` or `bytes` content to write.
        mode: open mode, default `wb` for byte writing.
        **kwargs: pass through storage-specific options
    """
    if uri == "-":
        if isinstance(content, str):
            content = content.encode()
    with smart_open(uri, mode, **kwargs) as fh:
        fh.write(content)