From a50ba784b61e7e1c534b10ad23b62290ee707203 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 11 Apr 2012 22:24:48 -0400 Subject: [PATCH] line reading performance tweaks (issue #661) --- base/io.jl | 9 ++++----- src/julia.h | 2 +- src/sys.c | 42 ++++++++++++++++++++++++++++++------------ 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/base/io.jl b/base/io.jl index 1de6475c70d94..0a4704349a169 100644 --- a/base/io.jl +++ b/base/io.jl @@ -234,10 +234,9 @@ function read{T<:Union(Int8,Uint8,Int16,Uint16,Int32,Uint32,Int64,Uint64,Float32 end end -function readuntil(s::IOStream, delim::Uint8) - a = ccall(:jl_readuntil, Any, (Ptr{Void}, Uint8), s.ios, delim) - # TODO: faster versions that avoid this encoding check - ccall(:jl_array_to_string, Any, (Any,), a)::ByteString +function readuntil(s::IOStream, delim) + # TODO: faster versions that avoid the encoding check + ccall(:jl_readuntil, Any, (Ptr{Void}, Uint8), s.ios, delim) end function readall(s::IOStream) @@ -246,7 +245,7 @@ function readall(s::IOStream) takebuf_string(dest) end -readline(s::IOStream) = readuntil(s, uint8('\n')) +readline(s::IOStream) = readuntil(s, '\n') flush(s::IOStream) = ccall(:ios_flush, Void, (Ptr{Void},), s.ios) diff --git a/src/julia.h b/src/julia.h index 4cbf4e9617a12..bbd35bf838f51 100644 --- a/src/julia.h +++ b/src/julia.h @@ -920,7 +920,7 @@ DLLEXPORT void jl_set_current_output_stream_obj(jl_value_t *v); DLLEXPORT jl_array_t *jl_takebuf_array(ios_t *s); DLLEXPORT jl_value_t *jl_takebuf_string(ios_t *s); -DLLEXPORT jl_array_t *jl_readuntil(ios_t *s, uint8_t delim); +DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim); static inline void jl_eh_restore_state(jl_savestate_t *ss) { diff --git a/src/sys.c b/src/sys.c index a2f731d218731..54b498b708f64 100644 --- a/src/sys.c +++ b/src/sys.c @@ -153,22 +153,40 @@ jl_value_t *jl_takebuf_string(ios_t *s) return str; } -jl_array_t *jl_readuntil(ios_t *s, uint8_t delim) +jl_value_t *jl_readuntil(ios_t *s, uint8_t delim) { - jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, 80); - ios_t dest; - jl_ios_mem(&dest, 0); - ios_setbuf(&dest, a->data, 80, 0); - size_t n = ios_copyuntil(&dest, s, delim); - if (dest.buf != a->data) { - return jl_takebuf_array(&dest); + jl_array_t *a; + // manually inlined common case + char *pd = (char*)memchr(s->buf+s->bpos, delim, s->size - s->bpos); + if (pd) { + size_t n = pd-(s->buf+s->bpos)+1; + a = jl_alloc_array_1d(jl_array_uint8_type, n); + memcpy(jl_array_data(a), s->buf+s->bpos, n); + s->bpos += n; } else { - a->length = n; - a->nrows = n; - ((char*)a->data)[n] = '\0'; + a = jl_alloc_array_1d(jl_array_uint8_type, 80); + ios_t dest; + jl_ios_mem(&dest, 0); + ios_setbuf(&dest, a->data, 80, 0); + size_t n = ios_copyuntil(&dest, s, delim); + if (dest.buf != a->data) { + a = jl_takebuf_array(&dest); + } + else { + a->length = n; + a->nrows = n; + ((char*)a->data)[n] = '\0'; + } } - return a; + JL_GC_PUSH(&a); + jl_struct_type_t* string_type = u8_isvalid(a->data, a->length) == 1 ? // ASCII + jl_ascii_string_type : jl_utf8_string_type; + jl_value_t *str = alloc_2w(); + str->type = (jl_type_t*)string_type; + jl_fieldref(str,0) = (jl_value_t*)a; + JL_GC_POP(); + return str; } // -- syscall utilities --