Skip to content

Commit

Permalink
Merge pull request #622 from gnieh/fix/emit-row-header-size-mismatch
Browse files Browse the repository at this point in the history
Emit an error if row and header size mismatch
  • Loading branch information
satabin authored Jul 28, 2024
2 parents fff5a0b + 812c16d commit e3e6561
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 2 deletions.
45 changes: 45 additions & 0 deletions csv/shared/src/main/scala/fs2/data/csv/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ package object csv {
}

/** Encode a specified type into a CSV prepending the given headers. */
@deprecated(
message =
"Emits incorrect data if rows have a different length than headers. Please use `encodeWithGivenHeaders` instead.",
since = "fs2-data 1.11.1")
def encodeGivenHeaders[T]: PartiallyAppliedEncodeGivenHeaders[T] =
new PartiallyAppliedEncodeGivenHeaders[T](dummy = true)

Expand All @@ -217,6 +221,27 @@ package object csv {
}
}

/** Encode a specified type into a CSV prepending the given headers. */
def encodeWithGivenHeaders[T]: PartiallyAppliedEncodeWithGivenHeaders[T] =
new PartiallyAppliedEncodeWithGivenHeaders[T](dummy = true)

@nowarn
class PartiallyAppliedEncodeWithGivenHeaders[T](val dummy: Boolean) extends AnyVal {
def apply[F[_], Header](headers: NonEmptyList[Header],
fullRows: Boolean = false,
separator: Char = ',',
newline: String = "\n",
escape: EscapeMode = EscapeMode.Auto)(implicit
F: RaiseThrowable[F],
T: RowEncoder[T],
H: WriteableHeader[Header]): Pipe[F, T, String] = {
val stringPipe =
if (fullRows) lowlevel.toRowStrings[F](separator, newline, escape)
else lowlevel.toStrings[F](separator, newline, escape)
lowlevel.encode[F, T] andThen lowlevel.writeWithGivenHeaders(headers) andThen stringPipe
}
}

/** Encode a specified type into a CSV that contains the headers determined by encoding the first element. Empty if input is. */
def encodeUsingFirstHeaders[T]: PartiallyAppliedEncodeUsingFirstHeaders[T] =
new PartiallyAppliedEncodeUsingFirstHeaders(dummy = true)
Expand Down Expand Up @@ -316,10 +341,30 @@ package object csv {
}

/** Encode a given type into CSV rows using a set of explicitly given headers. */
@deprecated(
message =
"Emits incorrect data if rows have a different length than headers. Please use `writeWithGivenHeaders` instead.",
since = "fs2-data 1.11.1")
def writeWithHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] =
Stream(H(headers)) ++ _.map(_.values)

/** Encode a given type into CSV rows using a set of explicitly given headers. */
def writeWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
F: RaiseThrowable[F],
H: WriteableHeader[Header]): Pipe[F, Row, NonEmptyList[String]] =
attemptWriteWithGivenHeaders(headers).apply(_).rethrow

/** Encode a given type into CSV rows using a set of explicitly given headers, but signals errors as values. */
def attemptWriteWithGivenHeaders[F[_], Header](headers: NonEmptyList[Header])(implicit
H: WriteableHeader[Header]): Pipe[F, Row, Either[CsvException, NonEmptyList[String]]] = {
val headerSize = headers.size
Stream(Right(H(headers))) ++ _.map { row =>
val rowSize = row.size
if (rowSize == headerSize) Right(row.values) else Left(new HeaderSizeError(headerSize, rowSize, row.line))
}
}

/** Encode a given type into CSV rows without headers. */
def writeWithoutHeaders[F[_]]: Pipe[F, Row, NonEmptyList[String]] =
_.map(_.values)
Expand Down
51 changes: 51 additions & 0 deletions csv/shared/src/test/scala/fs2/data/csv/RowGeneratorTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2024 fs2-data Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.csv

import cats.data.NonEmptyList
import weaver.*

object RowGeneratorTest extends SimpleIOSuite {

pureTest("Emit error on wrong row size (#621)") {
val input = List(
Row(NonEmptyList.of("a", "b", "c"), Some(1)),
Row(NonEmptyList.of("d", "e"), Some(2)),
Row(NonEmptyList.of("f", "g", "h", "i"), Some(3)),
Row(NonEmptyList.of("j", "k", "l"), Some(4))
)
val headers = NonEmptyList.of("first", "second", "third")

val result = fs2.Stream.emits(input).through(lowlevel.attemptWriteWithGivenHeaders(headers)).compile.toList

matches(result) {
case List(
Right(NonEmptyList("first", "second" :: "third" :: Nil)),
Right(NonEmptyList("a", "b" :: "c" :: Nil)),
Left(e1: HeaderSizeError),
Left(e2: HeaderSizeError),
Right(NonEmptyList("j", "k" :: "l" :: Nil))
) =>
expect.all(e1.expectedColumns == 3,
e1.actualColumns == 2,
e1.line == Some(2L),
e2.expectedColumns == 3,
e2.actualColumns == 4,
e2.line == Some(3L))
}
}
}
4 changes: 2 additions & 2 deletions site/documentation/csv/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ More high-level pipes are available for the following use cases:
* `decodeGivenHeaders` for CSV parsing that requires headers, but they aren't present in the input
* `decodeUsingHeaders` for CSV parsing that requires headers and they're present in the input
* `encodeWithoutHeaders` for CSV encoding that works entirely without headers (Note: requires `RowEncoder` instead of `CsvRowEncoder`)
* `encodeGivenHeaders` for CSV encoding that works without headers, but they should be added to the output
* `encodeWithGivenHeaders` for CSV encoding that works without headers, but they should be added to the output
* `encodeUsingFirstHeaders` for CSV encoding that works with headers. Uses the headers of the first row for the output.

### Dealing with erroneous files
Expand Down Expand Up @@ -219,7 +219,7 @@ testRows
.string
```

If you want to write headers, use `writeWithHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`.
If you want to write headers, use `writeWithGivenHeaders` or, in case you use `CsvRow`, `encodeRowWithFirstHeaders`. For writing non-String headers, you'll need to provide an instance of `WritableHeader`, a type class analog to `ParseableHeader`.

## The type classes: Decoders and Encoders

Expand Down

0 comments on commit e3e6561

Please sign in to comment.