Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor changes to twoPointsCrossovers #1723

Merged
merged 15 commits into from
Dec 9, 2021
46 changes: 21 additions & 25 deletions framework/Optimizers/crossOverOperators/crossovers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def onePointCrossover(parents,**kwargs):
nParents,nGenes = np.shape(parents)
# Number of children = 2* (nParents choose 2)
children = xr.DataArray(np.zeros((int(2*comb(nParents,2)),nGenes)),
dims=['chromosome','Gene'],
coords={'chromosome': np.arange(int(2*comb(nParents,2))),
'Gene':kwargs['variables']})
dims=['chromosome','Gene'],
coords={'chromosome': np.arange(int(2*comb(nParents,2))),
'Gene':kwargs['variables']})


# defaults
Expand Down Expand Up @@ -106,7 +106,7 @@ def uniformCrossover(parents,**kwargs):
return children


def twoPointsCrossover(parents, parentIndexes,**kwargs):
def twoPointsCrossover(parents, **kwargs):
"""
Method designed to perform a two point crossover on 2 parents:
Partition each parents in three sequences (A,B,C):
Expand All @@ -116,7 +116,6 @@ def twoPointsCrossover(parents, parentIndexes,**kwargs):
children1 = A1 B2 C1
children2 = A2 B1 C2
@ In, parents, xr.DataArray, parents involved in the mating process
@ In, parentIndexes, list, list containing pairs of parents
@ In, kwargs, dict, dictionary of parameters for this mutation method:
parents, 2D array, parents in the current mating process.
Shape is nParents x len(chromosome) i.e, number of Genes/Vars
Expand All @@ -129,23 +128,20 @@ def twoPointsCrossover(parents, parentIndexes,**kwargs):
dims=['chromosome','Gene'],
coords={'chromosome': np.arange(int(2*comb(nParents,2))),
'Gene':parents.coords['Gene'].values})
parentPairs = list(combinations(parents,2))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we control here the total number of pairs to be considered?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, you are using combinations which assume the first dim of parents is the parentIndexes, please add more details in the docstring to make it more clear, i.e., to indicate the first dim is the parent index

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that parentIndexes is not used anymore, the docstring indicates how parents is supposed to be structured

index = 0
for couples in parentIndexes:
locRangeList = list(range(0,nGenes))
index1 = randomUtils.randomIntegers(0, len(locRangeList), caller=None, engine=None)
loc1 = locRangeList[index1]
locRangeList.pop(loc1)
index2 = randomUtils.randomIntegers(0, len(locRangeList), caller=None, engine=None)
loc2 = locRangeList[index2]
if loc1>loc2:
locL=loc2
locU=loc1
elif loc1<loc2:
if nGenes<=2:
ValueError('In Two point Crossover the number of genes should be >=3!')
for couples in parentPairs:
[loc1,loc2] = randomUtils.randomChoice(list(range(1,nGenes)), size=2, replace=False, engine=None)
if loc1 > loc2:
locL = loc2
locU = loc1
else:
locL=loc1
locU=loc2

parent1 = parents[couples[0]].values
parent2 = parents[couples[1]].values
parent1 = couples[0]
parent2 = couples[1]
children1,children2 = twoPointsCrossoverMethod(parent1,parent2,locL,locU)

children[index] = children1
Expand Down Expand Up @@ -187,14 +183,14 @@ def twoPointsCrossoverMethod(parent1,parent2,locL,locU):
@ Out, children1: first generated array
@ Out, children2: second generated array
"""
children1 = parent1
children2 = parent2
children1 = parent1.copy(deep=True)
children2 = parent2.copy(deep=True)

seqB1 = parent1.values[locL:locU+1]
seqB2 = parent2.values[locL:locU+1]
seqB1 = parent1.values[locL:locU]
seqB2 = parent2.values[locL:locU]

children1[locL:locU+1] = seqB2
children2[locL:locU+1] = seqB1
children1[locL:locU] = seqB2
children2[locL:locU] = seqB1
return children1,children2

def uniformCrossoverMethod(parent1,parent2,crossoverProb):
Expand Down
14 changes: 9 additions & 5 deletions framework/Optimizers/parentSelectors/parentSelectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ def tournamentSelection(population,**kwargs):
if not multiObjectiveRanking: # single-objective implementation of tournamentSelection
for i in range(nParents):
if matrixOperation[2*i,1] > matrixOperation[2*i+1,1]:
index = int(matrixOperation[i,0])
index = int(matrixOperation[2*i,0])
else:
index = int(matrixOperation[i+1,0])
index = int(matrixOperation[2*i+1,0])
Comment on lines +127 to +129
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain the changes from i to 2i, and i+1 to 2i+1?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was a bug, we are filling the population 2 children at a time, hence we need to populate it using the 2i and 2i+1 indexes

selectedParent[i,:] = pop.values[index,:]
else: # multi-objective implementation of tournamentSelection
for i in range(nParents-1):
Expand All @@ -147,8 +147,8 @@ def tournamentSelection(population,**kwargs):
def rankSelection(population,**kwargs):
"""
Rank Selection mechanism for parent selection

@ In, population, xr.DataArray, populations containing all chromosomes (individuals) candidate to be parents, i.e. population.values.shape = populationSize x nGenes.
@ In, population, xr.DataArray, populations containing all chromosomes (individuals) candidate to be parents,
i.e. population.values.shape = populationSize x nGenes.
@ In, kwargs, dict, dictionary of parameters for this mutation method:
fitness, np.array, fitness of each chromosome (individual) in the population, i.e., np.shape(fitness) = 1 x populationSize
nParents, int, number of required parents.
Expand All @@ -166,7 +166,11 @@ def rankSelection(population,**kwargs):
dataOrderedByIncreasingPos = dataOrderedByDecreasingFitness[:,dataOrderedByDecreasingFitness[1].argsort()]
orderedRank = dataOrderedByIncreasingPos[0,:]

selectedParent = rouletteWheel(population, fitness=orderedRank , nParents=kwargs['nParents'],variables=kwargs['variables'])
rank = xr.DataArray(orderedRank,
dims=['chromosome'],
coords={'chromosome': np.arange(np.shape(orderedRank)[0])})

selectedParent = rouletteWheel(population, fitness=rank , nParents=kwargs['nParents'],variables=kwargs['variables'])

return selectedParent

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def fitnessBased(newRlz,**kwargs):
dims=['chromosome'],
coords={'chromosome':np.arange(np.shape(newFitness)[0])})

#return newPopulationArray,newFitness,newAge
return newPopulationArray,newFitness,newAge,kwargs['popObjectiveVal']

__survivorSelectors = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
<?xml version="1.0" ?>
<Simulation verbosity="debug">
<TestInfo>
<name>framework/Optimizers/GA.MaxwoReplacemenTwoPointsCrossovert</name>
<author>MohammadAbdo</author>
<created>2020-05-16</created>
<classesTested>GeneticAlgorithm</classesTested>
<description>
This test assesses the Genetic algorithm using the weighted sum found in myLocalSum.py function.
The nominal dimensionality of the test problem is 3.
The objective variable is ans. The problem in unconstrained, it is a maximization problem, and the sampling is from discrete variables without replacement.
The cross over mechanism used is the twoPointsCrossover algorithm
Comment on lines +9 to +12
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you update the description to reflect the fix proposed in this PR?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fix in this PR is to make TwoPointsCrossover work, as it wasn't working before, and this ios the first test on it that's why the last line is added. Can you suggest what should I update the description to?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm looking for the explanation about failure, and the modifications for the fix @Jimmy-INL

</description>
<analytic>
This test uses myLocalSum's analytic objective function.
</analytic>
</TestInfo>

<RunInfo>
<WorkingDir>MaxwoReplacementTwoPointsCrossover</WorkingDir>
<Sequence>optimize, print</Sequence>
</RunInfo>

<Steps>
<MultiRun name="optimize" re-seeding="2286">
<Input class="DataObjects" type="PointSet">placeholder</Input>
<Model class="Models" type="ExternalModel">myLocalSum</Model>
<Optimizer class="Optimizers" type="GeneticAlgorithm">GAopt</Optimizer>
<SolutionExport class="DataObjects" type="PointSet">opt_export</SolutionExport>
<Output class="DataObjects" type="PointSet">optOut</Output>
</MultiRun>
<IOStep name="print">
<Input class="DataObjects" type="PointSet">opt_export</Input>
<Input class="DataObjects" type="PointSet">optOut</Input>
<Output class="OutStreams" type="Print">opt_export</Output>
<Output class="OutStreams" type="Print">optOut</Output>
</IOStep>
</Steps>

<Distributions>
<UniformDiscrete name='uniform_dist_Repl_1'>
<lowerBound>1</lowerBound>
<upperBound>6</upperBound>
<strategy>withReplacement</strategy>
</UniformDiscrete>

<UniformDiscrete name='uniform_dist_woRepl_1'>
<lowerBound>1</lowerBound>
<upperBound>6</upperBound>
<strategy>withoutReplacement</strategy>
</UniformDiscrete>
</Distributions>

<Optimizers>
<GeneticAlgorithm name="GAopt">
<samplerInit>
<limit>20</limit>
<initialSeed>42</initialSeed>
<writeSteps>every</writeSteps>
<type>max</type>
</samplerInit>

<GAparams>
<populationSize>20</populationSize>
<parentSelection>rouletteWheel</parentSelection>
<reproduction>
<crossover type="twoPointsCrossover">
<crossoverProb>0.8</crossoverProb>
</crossover>
<mutation type="swapMutator">
<mutationProb>0.9</mutationProb>
</mutation>
</reproduction>
<fitness type="invLinear">
<a>2.0</a>
<b>1.0</b>
</fitness>
<survivorSelection>fitnessBased</survivorSelection>
</GAparams>

<convergence>
<objective>-1</objective>
</convergence>
Comment on lines +81 to +83
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have checked several definitions in the user manual, for GA (see attached picture), it seems to me the value should be always positive, in addition, we should use InputData to check it. @Jimmy-INL @mandd

image

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangcj05, that's what I am saying. The algorithm will converge when $Objective = \epsilon^{obj}$. If I don't want to use the objective convergence and want the algorithm to carry on till it reaches the iteration limit, I select $\epsilon^{obj}$ to be something impossible. For this problem, the obj is always positive, so I picked -1 so that conv_obj is always False (0). If I couldn't explain myself, let's discuss this face to face. Thanks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, the convergence on objective is not consistent across different optimizations, and it can be confusing, a new issue has been opened #1729


<variable name="x1">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<variable name="x2">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<variable name="x3">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<variable name="x4">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<variable name="x5">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<variable name="x6">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>

<objective>ans</objective>
<TargetEvaluation class="DataObjects" type="PointSet">optOut</TargetEvaluation>
<Sampler class="Samplers" type="MonteCarlo">MC_samp</Sampler>
</GeneticAlgorithm>
</Optimizers>

<Samplers>
<MonteCarlo name="MC_samp">
<samplerInit>
<limit>20</limit>
<initialSeed>20021986</initialSeed>
</samplerInit>
<variable name="x1">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
<variable name="x2">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
<variable name="x3">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
<variable name="x4">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
<variable name="x5">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
<variable name="x6">
<distribution>uniform_dist_woRepl_1</distribution>
</variable>
</MonteCarlo>
</Samplers>

<Models>
<ExternalModel ModuleToLoad="../../../../../AnalyticModels/optimizing/myLocalSum.py" name="myLocalSum" subType="">
<inputs>x1,x2,x3,x4,x5,x6,ans</inputs>
<outputs>x1,x2,x3,x4,x5,x6,ans</outputs>
</ExternalModel>
</Models>

<DataObjects>
<PointSet name="placeholder"/>
<PointSet name="optOut">
<Input>x1,x2,x3,x4,x5,x6</Input>
<Output>ans</Output>
</PointSet>
<PointSet name="opt_export">
<Input>trajID</Input>
<Output>x1,x2,x3,x4,x5,x6,ans,age,batchId,fitness,iteration,accepted,conv_objective</Output>
</PointSet>
</DataObjects>

<OutStreams>
<Print name="optOut">
<type>csv</type>
<source>optOut</source>
</Print>
<Print name="opt_export">
<type>csv</type>
<source>opt_export</source>
<clusterLabel>trajID</clusterLabel>
</Print>
</OutStreams>
</Simulation>
Loading