Skip to content

Commit

Permalink
Merge pull request #141 from feyst/master
Browse files Browse the repository at this point in the history
Add functionality to parse source as specified encoding and get result as specified encoding
  • Loading branch information
sabas authored Oct 16, 2024
2 parents 8e0197a + eeadc31 commit 9f2c96e
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 11 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: CI

on:
push:
pull_request:
types:
- opened
- synchronize

jobs:
build-test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Cache Composer dependencies
uses: actions/cache@v3
with:
path: /tmp/composer-cache
key: ${{ runner.os }}-${{ hashFiles('**/composer.lock') }}
- uses: php-actions/composer@v6
- name: PHPUnit tests
run: ./vendor/bin/phpunit

3 changes: 0 additions & 3 deletions phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.3/phpunit.xsd"
bootstrap="tests/bootstrap.php"
cacheDirectory=".phpunit.cache"
executionOrder="depends,defects"
requireCoverageMetadata="false"
beStrictAboutCoverageMetadata="false"
beStrictAboutOutputDuringTests="true"
failOnRisky="true"
failOnWarning="true">
Expand Down
2 changes: 1 addition & 1 deletion src/EDI/Interpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ private function processXmlSegment(\SimpleXMLElement $elm, array &$message, int
} else {
if (! $segmentVisited && isset($elm['required'])) {
$segmentVisited = true;
if (\call_user_func($this->comparisonFunction, $message[$segmentIdx+1], $elm)) {
if (isset($message[$segmentIdx+1]) && \call_user_func($this->comparisonFunction, $message[$segmentIdx+1], $elm)) {
$errors[] = [
'text' => $this->messageTextConf['SPURIOUSSEGMENT'].($this->patchFiles ? ' (skipped)' : ''),
'position' => $segmentIdx,
Expand Down
37 changes: 33 additions & 4 deletions src/EDI/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class Parser
*/
private $messageNumber;

private ?string $sourceEncoding = null;

/**
* @var array<string,string>
*/
Expand Down Expand Up @@ -141,8 +143,13 @@ class Parser
*/
public function parse(): self
{
$rawSegments = $this->getRawSegments();
if ($this->sourceEncoding && isset(self::$charsets[$this->syntaxID]) && self::$charsets[$this->syntaxID] !== $this->sourceEncoding) {
$rawSegments = $this->convertEncoding($this->rawSegments, $this->sourceEncoding, self::$charsets[$this->syntaxID]);
}

$i = 0;
foreach ($this->getRawSegments() as $line) {
foreach ($rawSegments as $line) {
$i++;

// Null byte and carriage return removal. (CR+LF)
Expand Down Expand Up @@ -310,16 +317,38 @@ public function setStrict(bool $strict):void
$this->strict = $strict;
}

public function setSourceEncoding(string $sourceEncoding): void
{
$this->sourceEncoding = $sourceEncoding;
}

/**
* Get parsed lines/segments.
*/
public function get(): array
public function get(?string $encoding = null): array
{
if (empty($this->parsedfile)) {
$this->parse();
}

return $this->parsedfile;

if (null === $encoding) {
return $this->parsedfile;
}

return $this->convertEncoding($this->parsedfile, self::$charsets[$this->syntaxID], $encoding);
}

private function convertEncoding($data, string $from, string $to)
{
if (is_array($data)) {
foreach ($data as $k => $v) {
$data[$k] = $this->convertEncoding($v, $from, $to);
}
} elseif (is_string($data)) {
$data = function_exists('iconv') ? iconv($from, $to . '//TRANSLIT', $data) : mb_convert_encoding($data, $to, $from);
}

return $data;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions tests/EDITest/InterpreterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ public function testDESADV()
'JSON does not match expected output'
);

static::assertSame(3598, \strlen($interpreter->getJson()));
static::assertSame(9383, \strlen($interpreter->getJson(true)));
static::assertSame(3594, \strlen($interpreter->getJson()));
static::assertSame(9379, \strlen($interpreter->getJson(true)));

static::assertCount(2, $interpreter->getMessages());
static::assertCount(0, $interpreter->getErrors());
Expand Down
9 changes: 9 additions & 0 deletions tests/EDITest/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,13 @@ public function testReleaseCharacter()

static::assertSame($loaded[15][2], 'FIELD 1.1?:FIELD 1.2');
}

public function testUtf8EncodedSourceAndOutput()
{
$p = new Parser();
$p->load(__DIR__ . '/../files/example_utf8.edi');
$p->setSourceEncoding('UTF-8');
$loaded = $p->get('UTF-8');
static::assertSame($loaded[11][3][3], 'MUNCIË THE MIDDLE');
}
}
2 changes: 1 addition & 1 deletion tests/files/D96ADESADV.json
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@
"messageTrailer": {
"segmentIdx": 20,
"segmentCode": "UNT",
"segmentGroup": "SG16",
"segmentGroup": "",
"numberOfSegmentsInTheMessage": "21",
"messageReferenceNumber": "142"
}
Expand Down
23 changes: 23 additions & 0 deletions tests/files/example_utf8.edi
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
UNB+UNOC:1+1556150:31B+8888888:ZZ+160727:0953+1'
UNH+142+DESADV:0:96A:UN'
BGM+351+Y02197250+700101'
DTM+11:20160726:102'
RFF+ON:6877871'
RFF+PK:VEE0214439'
NAD+SU+1694901+31B'
NAD+BY+01131116+31B'
CPS+IE2156580387'
LIN+001'
PIA+5+9780738507330'
IMD+F+81+:::MUNCIË THE MIDDLE'
QTY+12:1'
RFF+LI:6877871'
RFF+ON:6877871'
LIN+002'
PIA+5+9781568361871'
IMD+F+81+:::ROADS TO SATA A 2'
QTY+12:1'
RFF+LI:6905456'
RFF+ON:6905456'
UNT+21+142'
UNZ+1+1'

0 comments on commit 9f2c96e

Please sign in to comment.