Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase SFTP chunk size to increase the SFTP throughput in both directions #664

Open
wants to merge 10 commits into
base: devel
Choose a base branch
from
1 change: 1 addition & 0 deletions docs/changelog-fragments/638.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed large sftp reads and proper overriding existing files -- by :user:`Jakuje`.
1 change: 1 addition & 0 deletions docs/changelog-fragments/664.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance of SFTP transfers by using larger transfer chunks -- by :user:`Jakuje`.
4 changes: 4 additions & 0 deletions src/pylibsshext/includes/libssh.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ cdef extern from "libssh/libssh.h" nogil:
pass
ctypedef ssh_session_struct* ssh_session

cdef struct ssh_string_struct:
pass
ctypedef ssh_string_struct* ssh_string

cdef struct ssh_key_struct:
pass
ctypedef ssh_key_struct* ssh_key
Expand Down
32 changes: 31 additions & 1 deletion src/pylibsshext/includes/sftp.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#
from posix.types cimport mode_t

from pylibsshext.includes.libssh cimport ssh_channel, ssh_session
from libc cimport stdint

from pylibsshext.includes.libssh cimport ssh_channel, ssh_session, ssh_string


cdef extern from "libssh/sftp.h" nogil:
Expand All @@ -30,6 +32,31 @@ cdef extern from "libssh/sftp.h" nogil:
pass
ctypedef sftp_file_struct * sftp_file

struct sftp_attributes_struct:
char *name
char *longname
stdint.uint32_t flags
stdint.uint8_t type
stdint.uint64_t size
stdint.uint32_t uid
stdint.uint32_t gid
char *owner
char *group
stdint.uint32_t permissions
stdint.uint64_t atime64
stdint.uint32_t atime
stdint.uint32_t atime_nseconds
stdint.uint64_t createtime
stdint.uint32_t createtime_nseconds
stdint.uint64_t mtime64
stdint.uint32_t mtime
stdint.uint32_t mtime_nseconds
ssh_string acl
stdint.uint32_t extended_count
ssh_string extended_type
ssh_string extended_data
ctypedef sftp_attributes_struct * sftp_attributes

cdef int SSH_FX_OK
cdef int SSH_FX_EOF
cdef int SSH_FX_NO_SUCH_FILE
Expand All @@ -55,5 +82,8 @@ cdef extern from "libssh/sftp.h" nogil:
ssize_t sftp_read(sftp_file file, const void *buf, size_t count)
int sftp_get_error(sftp_session sftp)

sftp_attributes sftp_stat(sftp_session session, const char *path)


cdef extern from "sys/stat.h" nogil:
cdef int S_IRWXU
65 changes: 43 additions & 22 deletions src/pylibsshext/sftp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
from posix.fcntl cimport O_CREAT, O_RDONLY, O_TRUNC, O_WRONLY

from cpython.bytes cimport PyBytes_AS_STRING
from cpython.mem cimport PyMem_Free, PyMem_Malloc

from pylibsshext.errors cimport LibsshSFTPException
from pylibsshext.session cimport get_libssh_session


SFTP_MAX_CHUNK = 32_768 # 32kB


MSG_MAP = {
sftp.SSH_FX_OK: "No error",
sftp.SSH_FX_EOF: "End-of-file encountered",
Expand Down Expand Up @@ -63,7 +67,7 @@ cdef class SFTP:
rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_WRONLY | O_CREAT | O_TRUNC, sftp.S_IRWXU)
if rf is NULL:
raise LibsshSFTPException("Opening remote file [%s] for write failed with error [%s]" % (remote_file, self._get_sftp_error_str()))
buffer = f.read(1024)
buffer = f.read(SFTP_MAX_CHUNK)

while buffer != b"":
length = len(buffer)
Expand All @@ -76,38 +80,55 @@ cdef class SFTP:
self._get_sftp_error_str(),
)
)
buffer = f.read(1024)
buffer = f.read(SFTP_MAX_CHUNK)
sftp.sftp_close(rf)

def get(self, remote_file, local_file):
cdef sftp.sftp_file rf
cdef char read_buffer[1024]
cdef char *read_buffer = NULL
cdef sftp.sftp_attributes attrs

remote_file_b = remote_file
if isinstance(remote_file_b, unicode):
remote_file_b = remote_file.encode("utf-8")

attrs = sftp.sftp_stat(self._libssh_sftp_session, remote_file_b)
if attrs is NULL:
raise LibsshSFTPException("Failed to stat the remote file [%s]. Error: [%s]"
% (remote_file, self._get_sftp_error_str()))
file_size = attrs.size

rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_RDONLY, sftp.S_IRWXU)
if rf is NULL:
raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" % (remote_file, self._get_sftp_error_str()))

while True:
file_data = sftp.sftp_read(rf, <void *>read_buffer, sizeof(char) * 1024)
if file_data == 0:
break
elif file_data < 0:
sftp.sftp_close(rf)
raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]"
% (remote_file, self._get_sftp_error_str()))

with open(local_file, 'wb+') as f:
bytes_written = f.write(read_buffer[:file_data])
if bytes_written and file_data != bytes_written:
sftp.sftp_close(rf)
raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]"
" does not match number of bytes [%s] written to local file [%s]"
" due to error [%s]"
% (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str()))
raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]"
% (remote_file, self._get_sftp_error_str()))

try:
with open(local_file, 'wb') as f:
buffer_size = min(SFTP_MAX_CHUNK, file_size)
read_buffer = <char *>PyMem_Malloc(buffer_size)
if read_buffer is NULL:
raise LibsshSFTPException("Memory allocation error")

while True:
file_data = sftp.sftp_read(rf, <void *>read_buffer, sizeof(char) * buffer_size)
if file_data == 0:
break
elif file_data < 0:
sftp.sftp_close(rf)
raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]"
% (remote_file, self._get_sftp_error_str()))

bytes_written = f.write(read_buffer[:file_data])
if bytes_written and file_data != bytes_written:
sftp.sftp_close(rf)
raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]"
" does not match number of bytes [%s] written to local file [%s]"
" due to error [%s]"
% (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str()))
finally:
if read_buffer is not NULL:
PyMem_Free(read_buffer)
sftp.sftp_close(rf)

def close(self):
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/sftp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Tests suite for sftp."""

import random
import string
import uuid

import pytest
Expand Down Expand Up @@ -48,6 +50,22 @@ def dst_path(file_paths_pair):
return path


@pytest.fixture
def other_payload():
"""Generate a binary test payload."""
uuid_name = uuid.uuid4()
return 'Original content: {name!s}'.format(name=uuid_name).encode()


@pytest.fixture
def dst_exists_path(file_paths_pair, other_payload):
"""Return a data destination path."""
path = file_paths_pair[1]
path.write_bytes(other_payload)
assert path.exists()
return path


def test_make_sftp(sftp_session):
"""Smoke-test SFTP instance creation."""
assert sftp_session
Expand All @@ -63,3 +81,49 @@ def test_get(dst_path, src_path, sftp_session, transmit_payload):
"""Check that SFTP file download works."""
sftp_session.get(str(src_path), str(dst_path))
assert dst_path.read_bytes() == transmit_payload


def test_get_existing(dst_exists_path, src_path, sftp_session, transmit_payload):
"""Check that SFTP file download works when target file exists."""
sftp_session.get(str(src_path), str(dst_exists_path))
assert dst_exists_path.read_bytes() == transmit_payload


def test_put_existing(dst_exists_path, src_path, sftp_session, transmit_payload):
"""Check that SFTP file download works when target file exists."""
sftp_session.put(str(src_path), str(dst_exists_path))
assert dst_exists_path.read_bytes() == transmit_payload


@pytest.fixture
def large_payload():
"""Generate a large 32769 byte (32kB + 1B) test payload."""
random_char_kilobyte = [ord(random.choice(string.printable)) for _ in range(1024)]
full_bytes_number = 32
a_32kB_chunk = bytes(random_char_kilobyte * full_bytes_number)
the_last_byte = random.choice(random_char_kilobyte).to_bytes(length=1, byteorder='big')
return a_32kB_chunk + the_last_byte


@pytest.fixture
def src_path_large(tmp_path, large_payload):
"""Return a remote path to a 32769 byte-sized file.

The pylibssh chunk size is 32769 so the test needs a file that would
execute at least two loops.
"""
path = tmp_path / 'large.txt'
path.write_bytes(large_payload)
return path


def test_put_large(dst_path, src_path_large, sftp_session, large_payload):
"""Check that SFTP can upload large file."""
sftp_session.put(str(src_path_large), str(dst_path))
assert dst_path.read_bytes() == large_payload


def test_get_large(dst_path, src_path_large, sftp_session, large_payload):
"""Check that SFTP can download large file."""
sftp_session.get(str(src_path_large), str(dst_path))
assert dst_path.read_bytes() == large_payload
Loading