Skip to content
This repository has been archived by the owner on Sep 1, 2023. It is now read-only.

Commit

Permalink
Merge pull request #3669 from ywcui1990/fixSDRclassifierBug
Browse files Browse the repository at this point in the history
Fix SDR classifier Region
  • Loading branch information
scottpurdy authored Jun 7, 2017
2 parents 010c50e + 66b4941 commit c42c8bb
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 97 deletions.
109 changes: 66 additions & 43 deletions src/nupic/algorithms/sdr_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):
:param classification: Dict of the classification information where:
- bucketIdx: index of the encoder bucket
- actValue: actual value going into the encoder
- bucketIdx: list of indices of the encoder bucket
- actValue: list of actual values going into the encoder
Classification could be None for inference mode.
:param learn: (bool) if true, learn this sample
Expand Down Expand Up @@ -204,8 +204,15 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):
print " patternNZ (%d):" % len(patternNZ), patternNZ
print " classificationIn:", classification

# Store pattern in our history
self._patternNZHistory.append((recordNum, patternNZ))
# ensures that recordNum increases monotonically
if len(self._patternNZHistory) > 0:
if recordNum < self._patternNZHistory[-1][0]:
raise ValueError("the record number has to increase monotonically")

# Store pattern in our history if this is a new record
if len(self._patternNZHistory) == 0 or \
recordNum > self._patternNZHistory[-1][0]:
self._patternNZHistory.append((recordNum, patternNZ))

# To allow multi-class classification, we need to be able to run learning
# without inference being on. So initialize retval outside
Expand All @@ -222,48 +229,63 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):
self._maxBucketIdx+1))), axis=0)
self._maxInputIdx = int(newMaxInputIdx)

# Get classification info
if classification is not None:
if type(classification["bucketIdx"]) is not list:
bucketIdxList = [classification["bucketIdx"]]
actValueList = [classification["actValue"]]
numCategory = 1
else:
bucketIdxList = classification["bucketIdx"]
actValueList = classification["actValue"]
numCategory = len(classification["bucketIdx"])
else:
if learn:
raise ValueError("classification cannot be None when learn=True")
actValueList = None
bucketIdxList = None
# ------------------------------------------------------------------------
# Inference:
# For each active bit in the activationPattern, get the classification
# votes
if infer:
retval = self.infer(patternNZ, classification)
retval = self.infer(patternNZ, actValueList)


if learn and classification["bucketIdx"] is not None:
# Get classification info
bucketIdx = classification["bucketIdx"]
actValue = classification["actValue"]

# Update maxBucketIndex and augment weight matrix with zero padding
if bucketIdx > self._maxBucketIdx:
for nSteps in self.steps:
self._weightMatrix[nSteps] = numpy.concatenate((
self._weightMatrix[nSteps],
numpy.zeros(shape=(self._maxInputIdx+1,
bucketIdx-self._maxBucketIdx))), axis=1)

self._maxBucketIdx = int(bucketIdx)

# Update rolling average of actual values if it's a scalar. If it's
# not, it must be a category, in which case each bucket only ever
# sees one category so we don't need a running average.
while self._maxBucketIdx > len(self._actualValues) - 1:
self._actualValues.append(None)
if self._actualValues[bucketIdx] is None:
self._actualValues[bucketIdx] = actValue
else:
if (isinstance(actValue, int) or
isinstance(actValue, float) or
isinstance(actValue, long)):
self._actualValues[bucketIdx] = ((1.0 - self.actValueAlpha)
* self._actualValues[bucketIdx]
+ self.actValueAlpha * actValue)
else:
for categoryI in range(numCategory):
bucketIdx = bucketIdxList[categoryI]
actValue = actValueList[categoryI]

# Update maxBucketIndex and augment weight matrix with zero padding
if bucketIdx > self._maxBucketIdx:
for nSteps in self.steps:
self._weightMatrix[nSteps] = numpy.concatenate((
self._weightMatrix[nSteps],
numpy.zeros(shape=(self._maxInputIdx+1,
bucketIdx-self._maxBucketIdx))), axis=1)

self._maxBucketIdx = int(bucketIdx)

# Update rolling average of actual values if it's a scalar. If it's
# not, it must be a category, in which case each bucket only ever
# sees one category so we don't need a running average.
while self._maxBucketIdx > len(self._actualValues) - 1:
self._actualValues.append(None)
if self._actualValues[bucketIdx] is None:
self._actualValues[bucketIdx] = actValue
else:
if (isinstance(actValue, int) or
isinstance(actValue, float) or
isinstance(actValue, long)):
self._actualValues[bucketIdx] = ((1.0 - self.actValueAlpha)
* self._actualValues[bucketIdx]
+ self.actValueAlpha * actValue)
else:
self._actualValues[bucketIdx] = actValue

for (learnRecordNum, learnPatternNZ) in self._patternNZHistory:
error = self._calculateError(recordNum, classification)
error = self._calculateError(recordNum, bucketIdxList)

nSteps = recordNum - learnRecordNum
if nSteps in self.steps:
Expand All @@ -289,7 +311,7 @@ def compute(self, recordNum, patternNZ, classification, learn, infer):



def infer(self, patternNZ, classification):
def infer(self, patternNZ, actValueList):
"""
Return the inference value from one input sample. The actual
learning happens in compute().
Expand Down Expand Up @@ -319,10 +341,10 @@ def infer(self, patternNZ, classification):

# NOTE: If doing 0-step prediction, we shouldn't use any knowledge
# of the classification input during inference.
if self.steps[0] == 0 or classification is None:
if self.steps[0] == 0 or actValueList is None:
defaultValue = 0
else:
defaultValue = classification["actValue"]
defaultValue = actValueList[0]
actValues = [x if x is not None else defaultValue
for x in self._actualValues]
retval = {"actualValues": actValues}
Expand Down Expand Up @@ -436,19 +458,20 @@ def write(self, proto):
proto.verbosity = self.verbosity


def _calculateError(self, recordNum, classification):
def _calculateError(self, recordNum, bucketIdxList):
"""
Calculate error signal
:param classification: dict of the classification information:
bucketIdx: index of the encoder bucket
actValue: actual value going into the encoder
:param bucketIdxList: list of encoder buckets
:return: dict containing error. The key is the number of steps
The value is a numpy array of error at the output layer
"""
error = dict()
targetDist = numpy.zeros(self._maxBucketIdx + 1)
targetDist[classification["bucketIdx"]] = 1.0
numCategories = len(bucketIdxList)
for bucketIdx in bucketIdxList:
targetDist[bucketIdx] = 1.0/numCategories

for (learnRecordNum, learnPatternNZ) in self._patternNZHistory:
nSteps = recordNum - learnRecordNum
Expand Down
75 changes: 32 additions & 43 deletions src/nupic/regions/sdr_classifier_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,59 +364,48 @@ def compute(self, inputs, outputs):
# when network.run() is called
self._computeFlag = True

# An input can potentially belong to multiple categories.
# If a category value is < 0, it means that the input does not belong to
# that category.
categories = [category for category in inputs["categoryIn"]
if category >= 0]

patternNZ = inputs["bottomUpIn"].nonzero()[0]

# ==========================================================================
# Allow to train on multiple input categories.
# Do inference first, and then train on all input categories.

# --------------------------------------------------------------------------
# 1. Call classifier. Don't train. Just inference. Train after.

# Use Dummy classification input, because this param is required even for
# inference mode. Because learning is off, the classifier is not learning
# this dummy input. Inference only here.
classificationIn = {"actValue": 0, "bucketIdx": 0}
clResults = self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=False,
infer=self.inferenceMode)

# ------------------------------------------------------------------------
# 2. Train classifier, no inference
if self.learningMode:
for category in categories:
classificationIn = {"bucketIdx": int(category),
"actValue": int(category)}

self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=False)

# If the input does not belong to a category, i.e. len(categories) == 0,
# then look for bucketIdx and actValueIn.
if len(categories) == 0:
# An input can potentially belong to multiple categories.
# If a category value is < 0, it means that the input does not belong to
# that category.
categories = [category for category in inputs["categoryIn"]
if category >= 0]

if len(categories) > 0:
# Allow to train on multiple input categories.
bucketIdxList = []
actValueList = []
for category in categories:
bucketIdxList.append(int(category))
actValueList.append(int(category))

classificationIn = {"bucketIdx": bucketIdxList,
"actValue": actValueList}
else:
# If the input does not belong to a category, i.e. len(categories) == 0,
# then look for bucketIdx and actValueIn.
if "bucketIdxIn" not in inputs:
raise KeyError("Network link missing: bucketIdxOut -> bucketIdxIn")
if "actValueIn" not in inputs:
raise KeyError("Network link missing: actValueOut -> actValueIn")

classificationIn = {"bucketIdx": int(inputs["bucketIdxIn"]),
"actValue": float(inputs["actValueIn"])}
self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=False)
else:
# Use Dummy classification input, because this param is required even for
# inference mode. Because learning is off, the classifier is not learning
# this dummy input. Inference only here.
classificationIn = {"actValue": 0, "bucketIdx": 0}

# Perform inference if self.inferenceMode is True
# Train classifier if self.learningMode is True
clResults = self._sdrClassifier.compute(recordNum=self.recordNum,
patternNZ=patternNZ,
classification=classificationIn,
learn=self.learningMode,
infer=self.inferenceMode)

# fill outputs with clResults
if clResults is not None and len(clResults) > 0:
Expand Down
100 changes: 97 additions & 3 deletions tests/integration/nupic/regions/single_step_sdr_classifier_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

from operator import itemgetter
import os
import tempfile
import unittest

import numpy as np

from datetime import datetime
from nupic.data.file_record_stream import FileRecordStream
from nupic.encoders import MultiEncoder, ScalarEncoder
from nupic.engine import Network

from nupic.frameworks.opf.model_factory import ModelFactory


def _getTempFileName():
Expand Down Expand Up @@ -128,7 +131,7 @@ def testSimpleMulticlassNetworkPY(self):
dataSource.close()
os.remove(filename)


@unittest.skip("Skip test until we updated SDR classifier in nupic.core")
def testSimpleMulticlassNetworkCPP(self):
# Setup data record stream of fake data (with three categories)
filename = _getTempFileName()
Expand Down Expand Up @@ -204,14 +207,105 @@ def testSimpleMulticlassNetworkCPP(self):
net.run(1)
inferredCats = classifier.getOutputData("categoriesOut")
self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(),
"Classififer did not infer expected category "
"Classifier did not infer expected category "
"for record number {}.".format(i))

# Close data stream, delete file.
dataSource.close()
os.remove(filename)


def testHelloWorldPrediction(self):
text = 'hello world.'
categories = list("abcdefghijklmnopqrstuvwxyz 1234567890.")
colsPerChar = 11
numColumns = (len(categories) + 1) * colsPerChar

MODEL_PARAMS = {
"model": "HTMPrediction",
"version": 1,
"predictAheadTime": None,
"modelParams": {
"inferenceType": "TemporalMultiStep",
"sensorParams": {
"verbosity": 0,
"encoders": {
"token": {
"fieldname": u"token",
"name": u"token",
"type": "CategoryEncoder",
"categoryList": categories,
"w": colsPerChar,
"forced": True,
}
},
"sensorAutoReset": None,
},
"spEnable": False,
"spParams": {
"spVerbosity": 0,
"globalInhibition": 1,
"columnCount": 2048,
"inputWidth": 0,
"numActiveColumnsPerInhArea": 40,
"seed": 1956,
"columnDimensions": 0.5,
"synPermConnected": 0.1,
"synPermActiveInc": 0.1,
"synPermInactiveDec": 0.01,
"boostStrength": 0.0,
},

"tmEnable": True,
"tmParams": {
"verbosity": 0,
"columnCount": numColumns,
"cellsPerColumn": 16,
"inputWidth": numColumns,
"seed": 1960,
"temporalImp": "tm_cpp",
"newSynapseCount": 6,
"maxSynapsesPerSegment": 11,
"maxSegmentsPerCell": 32,
"initialPerm": 0.21,
"permanenceInc": 0.1,
"permanenceDec": 0.05,
"globalDecay": 0.0,
"maxAge": 0,
"minThreshold": 3,
"activationThreshold": 5,
"outputType": "normal",
},
"clParams": {
"implementation": "py",
"regionName": "SDRClassifierRegion",
"verbosity": 0,
"alpha": 0.1,
"steps": "1",
},
"trainSPNetOnlyIfRequested": False,
},
}

model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({"predictedField": "token"})
model.enableLearning()

# train
prediction = None
for rpt in xrange(20):
for token in text:
if prediction is not None:
if rpt > 15:
self.assertEqual(prediction, token)
modelInput = {"token": token}
result = model.run(modelInput)
prediction = sorted(result.inferences["multiStepPredictions"][1].items(),
key=itemgetter(1), reverse=True)[0][0]
model.resetSequenceStates()
prediction = None



if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit c42c8bb

Please sign in to comment.