From e6cdcb8ea86476ff50d23ab41fe170a34eb64493 Mon Sep 17 00:00:00 2001 From: Menno Dekker Date: Wed, 12 Dec 2018 12:13:37 +0100 Subject: [PATCH 1/4] Partial fix #15 - labels no longer trimmed for multibyte chars --- src/Sav/Record/Variable.php | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Sav/Record/Variable.php b/src/Sav/Record/Variable.php index 3d7e151..125de0c 100644 --- a/src/Sav/Record/Variable.php +++ b/src/Sav/Record/Variable.php @@ -140,9 +140,17 @@ public function write(Buffer $buffer) $buffer->writeString($this->name, 8); if ($hasLabel) { + // Maxlength is 255 bytes, since we write utf8 a char can be multiple bytes $labelLength = min(mb_strlen($this->label), 255); - $buffer->writeInt($labelLength); - $buffer->writeString($this->label, Utils::roundUp($labelLength, 4)); + $label = mb_substr($this->label, 0, $labelLength); + $labelLengthBytes = mb_strlen($label, '8bit'); + while ($labelLengthBytes > 255) { + // Strip one char, can be multiple bytes + $label = mb_substr($this->label, 0, -1); + $labelLengthBytes = mb_strlen($label, '8bit'); + } + $buffer->writeInt($labelLengthBytes); + $buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); } // TODO: test @@ -156,7 +164,8 @@ public function write(Buffer $buffer) } } - $this->writeBlank($buffer, $seg0width); + // I think we don't need an empty record + //$this->writeBlank($buffer, $seg0width); // Write additional segments for very long string variables. if (self::isVeryLong($this->width)) { From 0ebd68bbee028595efeecca65fa08c0af03b868c Mon Sep 17 00:00:00 2001 From: Menno Dekker Date: Wed, 12 Dec 2018 12:28:49 +0100 Subject: [PATCH 2/4] Created a unittest for #15 --- tests/WriteMultibyteTest.php | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/WriteMultibyteTest.php diff --git a/tests/WriteMultibyteTest.php b/tests/WriteMultibyteTest.php new file mode 100644 index 0000000..7d0995d --- /dev/null +++ b/tests/WriteMultibyteTest.php @@ -0,0 +1,58 @@ +filename = __DIR__ . '/mbtest.sav'; + } + + public function testMultiByteLabel() + { + $data = [ + 'header' => [ + 'prodName' => '@(#) IBM SPSS STATISTICS', + 'layoutCode' => 2, + 'creationDate' => date('d M y'), + 'creationTime' => date('H:i:s'), + ], + 'variables' => [ + [ + 'name' => 'longname_longerthanexpected', + 'label' => 'Data zákończenia', + 'width' => 16, + 'format' => 1, + ], + [ + 'name' => 'ccc', + 'label' => '12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901233á', + 'format' => 5, + 'values' => [ + 1 => 'Panel', + ], + ], + ], + ]; + $writer = new Writer($data); + + $writer->save($file); + $reader = Reader::fromFile($file)->read(); + + // Sort name + $this->assertEquals($data['variables'][0]['label'], $reader->variables[0]->label); + + // Long name + $this->assertEquals(mb_substr($data['variables'][1]['label'],0,-1), $reader->variables[1]->label); + } + + public function tearDown() + { + unlink($this->filename); + } +} From 5f8c86cb7fd1976ffbadcea0918fd0df46f445d2 Mon Sep 17 00:00:00 2001 From: Menno Dekker Date: Wed, 12 Dec 2018 13:03:38 +0100 Subject: [PATCH 3/4] Fixed unittests --- tests/SavRandomReadWriteTest.php | 3 ++- tests/WriteMultibyteTest.php | 29 +++++++++++------------------ tests/data12.sav | Bin 0 -> 642 bytes tests/mbtest.sav | Bin 0 -> 802 bytes tests/test.sav | Bin 0 -> 706 bytes 5 files changed, 13 insertions(+), 19 deletions(-) create mode 100644 tests/data12.sav create mode 100644 tests/mbtest.sav create mode 100644 tests/test.sav diff --git a/tests/SavRandomReadWriteTest.php b/tests/SavRandomReadWriteTest.php index 05e98ab..c4f8040 100644 --- a/tests/SavRandomReadWriteTest.php +++ b/tests/SavRandomReadWriteTest.php @@ -126,7 +126,8 @@ public function testWriteRead($data) // ) // ); // } - $index += $var['width'] > 0 ? Utils::widthToOcts($var['width']) : 1; + //$index += $var['width'] > 0 ? Utils::widthToOcts($var['width']) : 1; + $index++; } // TODO: valueLabels diff --git a/tests/WriteMultibyteTest.php b/tests/WriteMultibyteTest.php index 7d0995d..1e46091 100644 --- a/tests/WriteMultibyteTest.php +++ b/tests/WriteMultibyteTest.php @@ -6,16 +6,11 @@ use SPSS\Sav\Writer; class WriteMultibyteTest extends TestCase -{ - public function setUp() - { - parent::setUp(); - $this->filename = __DIR__ . '/mbtest.sav'; - } - +{ + public function testMultiByteLabel() { - $data = [ + $data = [ 'header' => [ 'prodName' => '@(#) IBM SPSS STATISTICS', 'layoutCode' => 2, @@ -41,18 +36,16 @@ public function testMultiByteLabel() ]; $writer = new Writer($data); - $writer->save($file); - $reader = Reader::fromFile($file)->read(); - + $buffer = $writer->getBuffer(); + $buffer->rewind(); + + $reader = Reader::fromString($buffer->getStream())->read(); + // Sort name $this->assertEquals($data['variables'][0]['label'], $reader->variables[0]->label); - + // Long name - $this->assertEquals(mb_substr($data['variables'][1]['label'],0,-1), $reader->variables[1]->label); - } - - public function tearDown() - { - unlink($this->filename); + $this->assertEquals(mb_substr($data['variables'][1]['label'], 0, -1), $reader->variables[1]->label); } + } diff --git a/tests/data12.sav b/tests/data12.sav new file mode 100644 index 0000000000000000000000000000000000000000..c5d60e847461ab97ee0e9012b4b296a557faefc1 GIT binary patch literal 642 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiS^0x&T!Ffao#BM_s4NC!hB1((!h z1w#u%BP%0QD`N{H%)_e$XaUGR0U(CkCjevtahL%Z7zzR9ToOwX6{-#|%+5dBl3bOV zmzk)b0MZ8naD5!C3=BXF)n^n3lruCkHZe6bx3Dyz1xrBzHJm=!vj!yQrRFFout41f zl4l2E5DgOJ0Ajd%fou=}@j)2mpO=3j>S1a@`ZoLrf%pHRbUlm@GM5{OL2B867-R;> zd=NlpgVgat#f*S7EL?4K^7GR35_41IL5$R*k_@2RD+*GROHxxfVS#9yoSe)FG657r VAk$%?&cN`&9>@iQ=OAYy0RY#mW~TrE literal 0 HcmV?d00001 diff --git a/tests/mbtest.sav b/tests/mbtest.sav new file mode 100644 index 0000000000000000000000000000000000000000..b5c4002f8dfb73c04223ff66116dcff487b4aa76 GIT binary patch literal 802 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiS^0x&T!Ffao#BM_s4NC!hB1((!h z1w#u%BP(M=D-%m1%)_e$XaUGR0U(CkCjevtahL%Z7zzR9ToOwX6{-#|%+5dBl3bOV zmzk)b0MZ8naD5!C3=BXF)o1h%C}(J7Y+`C=ZeeLK0$Iii3aIh;!JaiBF)uYoL4gGt zZXkJfAO_JOF%BSxhbNE?0w6vJgTnjeUx<2`T9CdC|3TpWe<)oK z0Wu#1kl7%0yihSCAPtLm+noHo^t{B})OZjhwWuTm==O?&)Z~)X6i!$q+a@O`bAn6& Xr67>$uvEdo@WCF)1%u}xXCeUrEklSE literal 0 HcmV?d00001 diff --git a/tests/test.sav b/tests/test.sav new file mode 100644 index 0000000000000000000000000000000000000000..e234bfab53d23de102f14c01f3fedf6aa55dc7b2 GIT binary patch literal 706 zcmbtQ!AiqG5M7(9ts;2v=wU$*D%hCRgBC*5Pzyo@F;ekVHf?H)mezK?^y0_(BmRY- zAfDPE&^T|C!Gqp>@OEY=Z{}@gx6yR{y`6m-UWTEp*RE@_ao#+WhqABrcv`XTyMgXM zMWxPYXx|KGYSO(>9s4R)-B2qnb3zD<$oR=`eaDscxFekt$E_S4SKL$iPml@xfTc8u zTz=WG<+N*x-nV+A5quuKwYQh? zfp0}>!oKhVxo3zEg#1uF#1wnxCgn;mHWj=Tddy$V`cNwnw}^~{z_X$K_)PVzg&#CY zGEYM?fBwqun7dAdwn&6_lgRVn1)z=`b@Xgxd5I!+>nWv*Se_RV+Bl-faf{fK7ZBPw og1{-G8xEL;*J2_72jHC(b6Zrig-UZ3iz>%#v47NOW{<4<1`?5RKL7v# literal 0 HcmV?d00001 From 3fec102c5ba5fd03596f77dc9cc2d27055ad9dde Mon Sep 17 00:00:00 2001 From: Menno Dekker Date: Wed, 12 Dec 2018 13:17:05 +0100 Subject: [PATCH 4/4] Cleanup test files --- tests/data12.sav | Bin 642 -> 0 bytes tests/mbtest.sav | Bin 802 -> 0 bytes tests/test.sav | Bin 706 -> 0 bytes 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/data12.sav delete mode 100644 tests/mbtest.sav delete mode 100644 tests/test.sav diff --git a/tests/data12.sav b/tests/data12.sav deleted file mode 100644 index c5d60e847461ab97ee0e9012b4b296a557faefc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 642 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiS^0x&T!Ffao#BM_s4NC!hB1((!h z1w#u%BP%0QD`N{H%)_e$XaUGR0U(CkCjevtahL%Z7zzR9ToOwX6{-#|%+5dBl3bOV zmzk)b0MZ8naD5!C3=BXF)n^n3lruCkHZe6bx3Dyz1xrBzHJm=!vj!yQrRFFout41f zl4l2E5DgOJ0Ajd%fou=}@j)2mpO=3j>S1a@`ZoLrf%pHRbUlm@GM5{OL2B867-R;> zd=NlpgVgat#f*S7EL?4K^7GR35_41IL5$R*k_@2RD+*GROHxxfVS#9yoSe)FG657r VAk$%?&cN`&9>@iQ=OAYy0RY#mW~TrE diff --git a/tests/mbtest.sav b/tests/mbtest.sav deleted file mode 100644 index b5c4002f8dfb73c04223ff66116dcff487b4aa76..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 802 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiS^0x&T!Ffao#BM_s4NC!hB1((!h z1w#u%BP(M=D-%m1%)_e$XaUGR0U(CkCjevtahL%Z7zzR9ToOwX6{-#|%+5dBl3bOV zmzk)b0MZ8naD5!C3=BXF)o1h%C}(J7Y+`C=ZeeLK0$Iii3aIh;!JaiBF)uYoL4gGt zZXkJfAO_JOF%BSxhbNE?0w6vJgTnjeUx<2`T9CdC|3TpWe<)oK z0Wu#1kl7%0yihSCAPtLm+noHo^t{B})OZjhwWuTm==O?&)Z~)X6i!$q+a@O`bAn6& Xr67>$uvEdo@WCF)1%u}xXCeUrEklSE diff --git a/tests/test.sav b/tests/test.sav deleted file mode 100644 index e234bfab53d23de102f14c01f3fedf6aa55dc7b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 706 zcmbtQ!AiqG5M7(9ts;2v=wU$*D%hCRgBC*5Pzyo@F;ekVHf?H)mezK?^y0_(BmRY- zAfDPE&^T|C!Gqp>@OEY=Z{}@gx6yR{y`6m-UWTEp*RE@_ao#+WhqABrcv`XTyMgXM zMWxPYXx|KGYSO(>9s4R)-B2qnb3zD<$oR=`eaDscxFekt$E_S4SKL$iPml@xfTc8u zTz=WG<+N*x-nV+A5quuKwYQh? zfp0}>!oKhVxo3zEg#1uF#1wnxCgn;mHWj=Tddy$V`cNwnw}^~{z_X$K_)PVzg&#CY zGEYM?fBwqun7dAdwn&6_lgRVn1)z=`b@Xgxd5I!+>nWv*Se_RV+Bl-faf{fK7ZBPw og1{-G8xEL;*J2_72jHC(b6Zrig-UZ3iz>%#v47NOW{<4<1`?5RKL7v#