-
Notifications
You must be signed in to change notification settings - Fork 4
/
params.ini
133 lines (102 loc) · 4.06 KB
/
params.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
[General parameters]
# Random seed integer for reproducibility. If 'None', a seed will be randomly
# selected.
rnd_seed = None
# A value of '0' will hide most of the output. A value of '1' will print all
# output to screen.
verbose = 1
# Parallel run flag. If 'False', a single core will be used.
parallel = False
# If 'parallel=True', limit the number of processes to use. If 'None',
# it will be set to the number of available CPU cores minus 1.
processes = None
# Allow 'numpy' multithreading?. Enabling numpy's multithreading means that
# the library will use all the CPU cores. Increases the performance, but eats
# up all your CPU. If numpy is allowed to multithread, the parallel run will
# be disabled.
numpy_multi = True
[Input file's data columns]
#
# ID : Name of the columns containing the IDs. If the file does not have
# an 'ID' column, set to 'None' and one will be automatically
# generated.
# xy_coords : Names of the columns containing the (x, y) coordinates.
# data : Names of the data columns to be analyzed with the clustering
# method.
# uncert : Names of the columns containing the data uncertainties. These are
# only required if the 'resampleFlag' is set to 'True' in the
# 'Outer loop' parameters below. Otherwise, this parameter can be
# commented out.
#
ID = None
xy_coords = _x _y
data = pmRA pmDE Plx
uncert = e_pmRA e_pmDE e_Plx
# Data outliers removal: stdregion, local, forest
# Masked data will be identified with a '-1' in the final output file.
oultr_method = stdregion
# Standard deviation for the 'stdregion' method.
stdRegion_nstd = 5.
[Outer loop]
# Number of outer loop (OL) runs.
OL_runs = 25
# Re-sample using uncertainties?
resampleFlag = False
# Apply PCA dimensionality reduction?
PCAflag = True
# Number of dimensions to keep
PCAdims = 2
# Apply Gaussian-Uniform Mixture Model cleaning?
GUMM_flag = True
# This percentile decides the probability value (obtained by the GUMM) below
# which stars will be marked as non-members. A value closer to '0' improves the
# completeness at the expense of the purity and vice-versa for values closer
# to '100'. Use 'auto' to select the value where the probability curve raises
# above 'P=0.'.
GUMM_perc = auto
# Estimate the densities of the two populations (members and non-members) using
# the classification performed by the clustering algorithm plus the GUMM (if
# activated). From these two probability densities a new per-star probability
# is estimated.
KDEP_flag = True
[Inner loop]
# Maximum number of inner loop runs.
IL_runs = 25
# Number of members per cluster.
N_membs = 25
# Maximum number of clusters allowed.
N_cl_max = 1000
# Clustering method
#
# scikit-learns methods
# See: https://scikit-learn.org/stable/modules/clustering.html for a
# description of all the methods below.
#
# KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift,
# SpectralClustering, AgglomerativeClustering, DBSCAN, OPTICS, Birch,
# GaussianMixture, BayesianGaussianMixture, Voronoi
#
# The 'rkmeans' method uses R's 'kmeans' function, as done in UPMASK
#
# The 'kNNdens' method is adapted from the method presented in the article
# 'Clustering by fast search and find of density peaks', Rodriguez & Laio (2014)
# This method requires a single outer loop run ('OL_runs') because the results
# will not change with subsequent runs.
#
# The 'Voronoi' method is not really a clustering method but rather a way to
# evenly distribute clusters across the dimensions.
#
clust_method = MiniBatchKMeans
[Clustering parameters]
# Parameters (if any) for the selected clustering method. Only used if one of
# the scikit-learn methods was selected.
# For all the parameters below the format is 'xxx_yyyy', where:
# 'xxx' is the type of the parameter (bool, str, int, float), and
# 'yyyy' is the value of the parameter.
#
#threshold = float_0.1
#compute_labels = bool_True
#n_init = int_50
#cluster_all = bool_False
#bandwidth = float_0.1
#covariance_type = str_tied