Skip to content

Commit

Permalink
#9: streaming reading URL CSV and JSON Array data.
Browse files Browse the repository at this point in the history
  • Loading branch information
Georges Labrèche committed Nov 28, 2017
1 parent 27681b2 commit 961e640
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 12 deletions.
47 changes: 38 additions & 9 deletions src/main/java/io/frictionlessdata/datapackage/Resource.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
package io.frictionlessdata.datapackage;

import io.frictionlessdata.datapackage.exceptions.DataPackageException;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.validator.routines.UrlValidator;
import org.json.CDL;
import org.json.JSONArray;

/**
Expand Down Expand Up @@ -60,20 +69,40 @@ public Iterator<CSVRecord> iter() throws IOException, FileNotFoundException, Dat
}

if(this.path != null){
Reader in = new FileReader(this.path);
return CSVFormat.RFC4180.parse(in).iterator();

}else if (this.data != null){
if(this.data instanceof String){
CSVParser parser = CSVParser.parse((String)this.data, CSVFormat.RFC4180);
// First check if it's a URL String:
String[] schemes = {"http", "https"};
UrlValidator urlValidator = new UrlValidator(schemes);

if (urlValidator.isValid(this.path)) {
CSVParser parser = CSVParser.parse(new URL(this.path), Charset.forName("UTF-8"), CSVFormat.RFC4180);
return parser.getRecords().iterator();

}else if(this.data instanceof JSONArray){
//TODO: Implement:
JSONArray dataArr = (JSONArray)this.data;
return null;
}else{
// If it's not a URL String, then it's a CSV String.
Reader fr = new FileReader(this.path);
return CSVFormat.RFC4180.parse(fr).iterator();
}

}else if (this.data != null){

// Data is in String, hence in CSV Format.
if(this.data instanceof String && this.format.equalsIgnoreCase(FORMAT_CSV)){

Reader sr = new StringReader((String)this.data);
return CSVFormat.RFC4180.parse(sr).iterator();

}
// Data is not String, hence in JSON Array format.
else if(this.data instanceof JSONArray && this.format.equalsIgnoreCase(FORMAT_JSON)){
JSONArray dataJsonArray = (JSONArray)this.data;
String dataCsv = CDL.toString(dataJsonArray);

Reader sr = new StringReader(dataCsv);
return CSVFormat.RFC4180.parse(sr).iterator();

}else{
// Data is in unexpected format. Throw exception.
throw new DataPackageException("A resource has an invalid data format. It should be a CSV String or a JSON Array.");
}

Expand Down
82 changes: 79 additions & 3 deletions src/test/java/io/frictionlessdata/datapackage/ResourceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.Iterator;
import java.util.List;
import org.apache.commons.csv.CSVRecord;
import org.json.JSONArray;
import org.junit.Assert;
import org.junit.Test;

Expand All @@ -17,7 +18,39 @@
public class ResourceTest {

@Test
public void testIterDataFromPath() throws IOException, DataPackageException, MalformedURLException{
public void testIterateDataFromUrlPath() throws IOException, DataPackageException{

String urlString = "https://raw.githubusercontent.com/frictionlessdata/datapackage-java/master/src/test/resources/fixtures/data/population.csv";
Resource resource = new Resource("population", urlString);

// Set the profile to tabular data resource.
resource.setProfile(Profile.PROFILE_TABULAR_DATA_RESOURCE);

// Expected data.
List<String[]> expectedData = this.getExpectedPopulationData();

// Get iterator.
Iterator<CSVRecord> iter = resource.iter();
int expectedDataIndex = 0;

// Assert data.
while(iter.hasNext()){
CSVRecord record = iter.next();
String city = record.get(0);
String year = record.get(1);
String population = record.get(2);

Assert.assertEquals(expectedData.get(expectedDataIndex)[0], city);
Assert.assertEquals(expectedData.get(expectedDataIndex)[1], year);
Assert.assertEquals(expectedData.get(expectedDataIndex)[2], population);

expectedDataIndex++;
}
}


@Test
public void testIterateDataFromFilePath() throws IOException, DataPackageException, MalformedURLException{

String filePath = ResourceTest.class.getResource("/fixtures/data/population.csv").getPath();
Resource resource = new Resource("population", filePath);
Expand Down Expand Up @@ -48,7 +81,7 @@ public void testIterDataFromPath() throws IOException, DataPackageException, Mal
}

@Test
public void testIterDataFromDataInCsvForm() throws IOException, DataPackageException{
public void testIterateDataFromCsvFormat() throws IOException, DataPackageException{
String filePath = ResourceTest.class.getResource("/fixtures/data/population.csv").getPath();

String dataString = "city,year,population\nlondon,2017,8780000\nparis,2017,2240000\nrome,2017,2860000";
Expand Down Expand Up @@ -80,8 +113,51 @@ public void testIterDataFromDataInCsvForm() throws IOException, DataPackageExcep
}

@Test
public void testIterDataFromDataInJSONForm(){
public void testIterateDataFromJSONFormat() throws IOException, DataPackageException{
String filePath = ResourceTest.class.getResource("/fixtures/data/population.csv").getPath();
JSONArray jsonData = new JSONArray("[" +
"{" +
"\"city\": \"london\"," +
"\"year\": 2017," +
"\"population\": 8780000" +
"}," +
"{" +
"\"city\": \"paris\"," +
"\"year\": 2017," +
"\"population\": 2240000" +
"}," +
"{" +
"\"city\": \"rome\"," +
"\"year\": 2017," +
"\"population\": 2860000" +
"}" +
"]");

Resource resource = new Resource("population", jsonData, Resource.FORMAT_JSON);

// Set the profile to tabular data resource.
resource.setProfile(Profile.PROFILE_TABULAR_DATA_RESOURCE);

// Expected data.
List<String[]> expectedData = this.getExpectedPopulationData();

// Get Iterator.
Iterator<CSVRecord> iter = resource.iter();
int expectedDataIndex = 0;

// Assert data.
while(iter.hasNext()){
CSVRecord record = iter.next();
String city = record.get(0);
String year = record.get(1);
String population = record.get(2);

Assert.assertEquals(expectedData.get(expectedDataIndex)[0], city);
Assert.assertEquals(expectedData.get(expectedDataIndex)[1], year);
Assert.assertEquals(expectedData.get(expectedDataIndex)[2], population);

expectedDataIndex++;
}
}

private List<String[]> getExpectedPopulationData(){
Expand Down

0 comments on commit 961e640

Please sign in to comment.