Skip to content
This repository has been archived by the owner on Jun 18, 2023. It is now read-only.

Commit

Permalink
Change config & CSV format #29, #30
Browse files Browse the repository at this point in the history
- Add min_values/max_values in place of valid_range
- CSV file has header
- Use Pandas to parse CSV (so add as requirement)
- Update examples
- Bump version
- Check for implementation of YATSM algorithm
- Put YATSM algo class in config
  • Loading branch information
ceholden committed Aug 25, 2015
1 parent 498e8ed commit f38306e
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 67 deletions.
5 changes: 3 additions & 2 deletions examples/p022r049_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ dataset:
mask_values: [2, 3, 4, 255]
# Valid range of non-mask band data
# specify 1 range for all bands, or specify ranges for each band
valid_range: [0, 10000]
min_values: 0
max_values: 10000
# Indices for multi-temporal cloud masking (indexed on 1)
green_band: 2
swir1_band: 5
Expand Down Expand Up @@ -62,7 +63,7 @@ LassoCV:

# Section for phenology fitting
phenology:
calc_pheno: False
enable: False
# Specification for dataset indices required for EVI based phenology monitoring
red_index: 2
nir_index: 3
Expand Down
1 change: 1 addition & 0 deletions examples/p022r049_input.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
date,sensor,filename
1993009,LT5,/home/ceholden/Documents/landsat_stack/p022r049/images/LT50220491993009XXX04/LT50220491993009XXX04_stack
1993025,LT5,/home/ceholden/Documents/landsat_stack/p022r049/images/LT50220491993025AAA04/LT50220491993025AAA04_stack
1993041,LT5,/home/ceholden/Documents/landsat_stack/p022r049/images/LT50220491993041AAA04/LT50220491993041AAA04_stack
Expand Down
5 changes: 3 additions & 2 deletions examples/p035r032_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ dataset:
mask_values: [2, 3, 4, 255]
# Valid range of non-mask band data
# specify 1 range for all bands, or specify ranges for each band
valid_range: [0, 10000]
min_values: 0
max_values: 10000
# Indices for multi-temporal cloud masking (indexed on 1)
green_band: 2
swir1_band: 5
Expand Down Expand Up @@ -67,7 +68,7 @@ OLS:

# Section for phenology fitting
phenology:
calc_pheno: False
enable: False
# Specification for dataset indices required for EVI based phenology monitoring
red_index: 2
nir_index: 3
Expand Down
1 change: 1 addition & 0 deletions examples/p035r032_input.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
date,sensor,filename
1984108,LT5,/home/ceholden/Documents/landsat_stack/p035r032/images/LT50350321984108XXX01/LT50350321984108XXX01_stack.gtif
1984140,LT5,/home/ceholden/Documents/landsat_stack/p035r032/images/LT50350321984140PAC00/LT50350321984140PAC00_stack.gtif
1984156,LT5,/home/ceholden/Documents/landsat_stack/p035r032/images/LT50350321984156PAC00/LT50350321984156PAC00_stack.gtif
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ matplotlib>=1.4.2
click>=4.0
click_plugins>=1.0
palettable>=2.0.0
pandas>=0.16.0
patsy>=0.3.0
pyyaml>=3.11
4 changes: 3 additions & 1 deletion scripts/gen_date_file.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ EOF
}

function main() {
# Header
echo "date,sensor,filename" > $output

images=$(find $root -follow -name "$pattern")
nimages=$(echo $images | awk '{ print NF }')
Expand All @@ -56,7 +58,7 @@ function main() {
sensor=${id:$sstart:3}

echo "$ydoy,$sensor,$name"
done | sort > $output
done | sort >> $output

}

Expand Down
3 changes: 3 additions & 0 deletions yatsm/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
- ccdc.CCDCesque
"""
from .ccdc import CCDCesque

available = ['CCDCesque']
38 changes: 29 additions & 9 deletions yatsm/config_parser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import inspect
import StringIO
import yaml

import numpy as np
import sklearn.linear_model
import sklearn.externals.joblib
import yaml

from log_yatsm import logger
from version import __version__
from . import algorithms
from .log_yatsm import logger
from .version import __version__


def parse_config_file(config_file):
Expand All @@ -29,24 +31,42 @@ def parse_config_file(config_file):
cfg = yaml.safe_load(f)

# Ensure algorithm & prediction sections are specified
if 'YATSM' not in cfg.keys():
if 'YATSM' not in cfg:
raise KeyError('YATSM must be a section in configuration YAML file')

if 'algorithm' not in cfg['YATSM']:
raise KeyError('YATSM section does not declare an algorithm')
if cfg['YATSM']['algorithm'] not in cfg.keys():
algo = cfg['YATSM']['algorithm']
if algo not in cfg:
raise KeyError('Algorithm specified (%s) is not parameterized in '
'configuration file' % cfg['YATSM']['algorithm'])
'configuration file' % algo)

if 'prediction' not in cfg['YATSM'].keys():
if 'prediction' not in cfg['YATSM']:
raise KeyError('YATSM section does not declare a prediction method')
if cfg['YATSM']['prediction'] not in cfg.keys():
if cfg['YATSM']['prediction'] not in cfg:
raise KeyError('Prediction method specified (%s) is not parameterized '
'in configuration file' % cfg['YATSM']['prediction'])

# Embed algorithm in YATSM key
if algo not in algorithms.available:
raise NotImplementedError('Algorithm specified (%s) is not currently '
'available' % algo)
cfg['YATSM']['algorithm_cls'] = getattr(algorithms, algo)
if not cfg['YATSM']['algorithm_cls']:
raise KeyError('Could not find algorithm specified (%s) in '
'`yatsm.algorithms`' % algo)

# Expand min/max values to all bands
n_bands = cfg['dataset']['n_bands']
mins, maxes = cfg['dataset']['min_values'], cfg['dataset']['max_values']
if isinstance(mins, (float, int)):
cfg['dataset']['min_values'] = np.asarray([mins] * n_bands)
if isinstance(maxes, (float, int)):
cfg['dataset']['max_values'] = np.asarray([maxes] * n_bands)

# Add in dummy phenology and classification dicts if not included
if 'phenology' not in cfg:
cfg['phenology'] = {'calc_pheno': False}
cfg['phenology'] = {'enable': False}

if 'classification' not in cfg:
cfg['classification'] = {'training_image': None}
Expand Down
73 changes: 21 additions & 52 deletions yatsm/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import division

import csv
from datetime import datetime as dt
import fnmatch
import os
import sys

import numpy as np
import pandas as pd

from log_yatsm import logger

Expand Down Expand Up @@ -46,8 +46,10 @@ def distribute_jobs(job_number, total_jobs, n, interlaced=True):
tasks = np.arange(i_start, min(i_end, n))

if tasks.size == 0:
raise ValueError('No jobs assigned for job_number/total_jobs: {j}/{t}'.
format(j=job_number, t=total_jobs))
raise ValueError(
'No jobs assigned for job_number/total_jobs: {j}/{t}'.format(
j=job_number,
t=total_jobs))

return tasks

Expand All @@ -64,67 +66,36 @@ def get_output_name(dataset_config, line):
"""
return os.path.join(dataset_config['output'],
'{pref}{line}.npz'.format(
pref=dataset_config['output_prefix'],
line=line))
'%s%s.npz' % (dataset_config['output_prefix'], line))


# IMAGE DATASET READING
def csvfile_to_dataset(input_file, date_format='%Y-%j'):
def csvfile_to_dataset(input_file, date_format='%Y%j'):
""" Return sorted filenames of images from input text file
Args:
input_file (str): text file of dates and files
date_format (str): format of dates in file
Returns:
dict: dates, sensor IDs, and filenames of stacked images as np.ndarray
within a dict
dict: pd.DataFrame of dates, sensor IDs, and filenames
"""
# Store index of date and image
i_date = 0
i_sensor = 1
i_image = 2
df = pd.read_csv(input_file)

dates = []
images = []
sensors = []
# Guess and convert date field
date_col = [i for i, n in enumerate(df.columns) if 'date' in n.lower()]
if not date_col:
raise KeyError('Could not find date column in input file')
if len(date_col) > 1:
logger.warning('Multiple date columns found in input CSV file. '
'Using %s' % df.columns[date_col[0]])
date_col = df.columns[date_col[0]]

logger.debug('Opening image dataset file')
with open(input_file, 'rb') as f:
reader = csv.reader(f)
df[date_col] = pd.to_datetime(
df[date_col], format=date_format).map(lambda x: dt.toordinal(x))

# Figure out which index is for what
row = reader.next()

try:
dt.strptime(row[i_date], date_format).toordinal()
except:
logger.debug('Could not parse first column to ordinal date')
try:
dt.strptime(row[i_sensor], date_format).toordinal()
except:
logger.debug('Could not parse second column to ordinal date')
logger.error('Could not parse any columns to ordinal date')
logger.error('Input dataset file: {f}'.format(f=input_file))
logger.error('Date format: {f}'.format(f=date_format))
raise
else:
i_date = 1
i_sensor = 0

f.seek(0)

logger.debug('Reading in image date, sensor, and filenames')
for row in reader:
dates.append(dt.strptime(row[i_date], date_format).toordinal())
sensors.append(row[i_sensor])
images.append(row[i_image])

return {'dates': np.array(dates),
'sensors': np.array(sensors),
'images': np.array(images)}
return df


def get_image_IDs(filenames):
Expand Down Expand Up @@ -183,9 +154,7 @@ def write_output(raster, output, image_ds, gdal_frmt, ndv, band_names=None):

if band_names is not None:
ds.GetRasterBand(1).SetDescription(band_names[0])
ds.GetRasterBand(1).SetMetadata({
'band_1': band_names[0]
})
ds.GetRasterBand(1).SetMetadata({'band_1': band_names[0]})

ds.SetProjection(image_ds.GetProjection())
ds.SetGeoTransform(image_ds.GetGeoTransform())
Expand Down
2 changes: 1 addition & 1 deletion yatsm/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.4.0'
__version__ = '0.5.0b'

0 comments on commit f38306e

Please sign in to comment.