Skip to content

Commit

Permalink
Merge pull request #4924 from jtrmal/python_and_apple
Browse files Browse the repository at this point in the history
Fix reported issues w.r.t python2.7 and some apple silicone quirks
  • Loading branch information
danpovey authored Jul 23, 2024
2 parents 67548a3 + 816d438 commit 6f61393
Show file tree
Hide file tree
Showing 13 changed files with 166 additions and 102 deletions.
4 changes: 3 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,16 @@ endif

# Don't call rm -rf.
rmlibdir:
ifeq ($(KALDI_FLAVOR), dynamic)
ifneq ($(KALDILIBDIR), )
-rm -f $(KALDILIBDIR)/*{.so,.a,.o}
-rm -f $(KALDILIBDIR)/*{.so,.a,.o,.dylib}
-rmdir 2>/dev/null $(KALDILIBDIR); true
else
# KALDILIBDIR might have been unset because of reconfigure. Do a best guess.
@echo "Something seems wrong. Please re-run configure."
@echo "I will continue but the cleanup might not be complete."
endif
endif

kaldi.mk:
@echo "ERROR: kaldi.mk does not exist; run ./configure first.";
Expand Down
2 changes: 2 additions & 0 deletions src/configure
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,8 @@ elif [ "`uname`" == "Darwin" ]; then
cat makefiles/darwin_clapack.mk >> kaldi.mk
echo "Warning (CLAPACK): this part of the configure process is not properly tested and may not work."
echo "Successfully configured for Darwin with CLAPACK libs from $CLAPACKROOT"
elif [ "$(uname -m)" == "arm64" ]; then
cat makefiles/darwin_arm64.mk >> kaldi.mk
else
cat makefiles/darwin.mk >> kaldi.mk
fi
Expand Down
15 changes: 6 additions & 9 deletions src/cudamatrix/cu-array.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,12 @@ class CuArrayBase {
protected:
/// Default constructor: make it protected so the user cannot
/// instantiate this class.
CuArrayBase<T>(): data_(NULL), dim_(0) { }
CuArrayBase(): data_(NULL), dim_(0) { }


T *data_; ///< GPU data pointer (if GPU not available,
///< will point to CPU memory).
MatrixIndexT dim_; ///< dimension of the vector

};

/**
Expand All @@ -123,22 +122,21 @@ class CuArrayBase {
template<typename T>
class CuArray: public CuArrayBase<T> {
public:

/// Default constructor, initialized data_ to NULL and dim_ to 0 via
/// constructor of CuArrayBase.
CuArray<T>() { }
CuArray() { }

/// Constructor with memory initialisation. resize_type may be kSetZero or
/// kUndefined.
explicit CuArray<T>(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero)
explicit CuArray(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero)
{ Resize(dim, resize_type); }

/// Constructor from CPU-based int vector
explicit CuArray<T>(const std::vector<T> &src) { CopyFromVec(src); }
explicit CuArray(const std::vector<T> &src) { CopyFromVec(src); }

/// Copy constructor. We don't make this explicit because we want to be able
/// to create a std::vector<CuArray>.
CuArray<T>(const CuArray<T> &src) { CopyFromArray(src); }
CuArray(const CuArray<T> &src) { CopyFromArray(src); }

/// Destructor
~CuArray() { Destroy(); }
Expand Down Expand Up @@ -172,7 +170,6 @@ class CuArray: public CuArrayBase<T> {
/// I/O
void Read(std::istream &is, bool binary);
void Write(std::ostream &is, bool binary) const;

};


Expand All @@ -182,7 +179,7 @@ class CuSubArray: public CuArrayBase<T> {
/// Constructor as a range of an existing CuArray or CuSubArray. Note: like
/// similar constructors in class CuVector and others, it can be used to evade
/// 'const' constraints; don't do that.
explicit CuSubArray<T>(const CuArrayBase<T> &src,
explicit CuSubArray(const CuArrayBase<T> &src,
MatrixIndexT offset, MatrixIndexT dim);

/// Construct from raw pointers
Expand Down
22 changes: 11 additions & 11 deletions src/cudamatrix/cu-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ class CuMatrixBase {
template<typename OtherReal>
void CopyFromTp(const CuTpMatrix<OtherReal> &M,
MatrixTransposeType trans = kNoTrans);

// This function will copy from source rows (start_range, end_range]
// if the range is outside of the clamped region then the clamped
// row will be replicated across the out of range areas
Expand Down Expand Up @@ -307,9 +307,9 @@ class CuMatrixBase {
void PowAbs(const CuMatrixBase<Real> &src, Real power, bool include_sign=false);

void Floor(const CuMatrixBase<Real> &src, Real floor_val);

void Ceiling(const CuMatrixBase<Real> &src, Real ceiling_val);

/// This is equivalent to running:
/// Floor(src, lower_limit);
/// Ceiling(src, upper_limit);
Expand All @@ -320,7 +320,7 @@ class CuMatrixBase {
/// (x < 0 ? exp(x) : x + 1). This function is used
/// in our RNNLM training.
void ExpSpecial(const CuMatrixBase<Real> &src);

/// Softmax nonlinearity
/// Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row,
/// with attention to avoiding overflow or underflow.
Expand All @@ -333,7 +333,7 @@ class CuMatrixBase {
/// Supports in-place operation (i.e. this == &src).
void LogSoftMaxPerRow(const CuMatrixBase<Real> &src);


/// Apply the function y = log(1 + exp(x)), to each element.
/// Note: the derivative of this function is the sigmoid function.
/// This is like a soft ReLU.
Expand Down Expand Up @@ -439,23 +439,23 @@ class CuMatrixBase {
this -> Pow(*this, power);
};


inline void ApplyPowAbs(Real power, bool include_sign=false) {
this -> PowAbs(*this, power, include_sign);
};

inline void ApplyHeaviside() {
this -> Heaviside(*this);
};

inline void ApplyFloor(Real floor_val) {
this -> Floor(*this, floor_val);
};

inline void ApplyCeiling(Real ceiling_val) {
this -> Ceiling(*this, ceiling_val);
};

inline void ApplyExp() {
this -> Exp(*this);
};
Expand Down Expand Up @@ -924,7 +924,7 @@ class CuSubMatrix: public CuMatrixBase<Real> {

/// This type of constructor is needed for Range() to work [in CuMatrix base
/// class]. Cannot make it explicit or that breaks.
inline CuSubMatrix<Real> (const CuSubMatrix &other):
inline CuSubMatrix(const CuSubMatrix &other):
CuMatrixBase<Real> (other.data_, other.num_rows_, other.num_cols_,
other.stride_) {}
private:
Expand Down
16 changes: 8 additions & 8 deletions src/cudamatrix/cu-tp-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,18 @@ class CuTpMatrix : public CuPackedMatrix<Real> {
CuTpMatrix() : CuPackedMatrix<Real>() {}
explicit CuTpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero)
: CuPackedMatrix<Real>(r, resize_type) {}
explicit CuTpMatrix<Real>(const TpMatrix<Real> &orig)

explicit CuTpMatrix(const TpMatrix<Real> &orig)
: CuPackedMatrix<Real>(orig) {}
// This constructor lacks the "explicit" keyword so that
// we can include this class in std::vector.
CuTpMatrix<Real>(const CuTpMatrix<Real> &orig)
CuTpMatrix(const CuTpMatrix<Real> &orig)
: CuPackedMatrix<Real>(orig) {}
explicit CuTpMatrix<Real>(const CuMatrixBase<Real> &orig,

explicit CuTpMatrix(const CuMatrixBase<Real> &orig,
MatrixTransposeType trans = kNoTrans);


~CuTpMatrix() {}

void CopyFromMat(const CuMatrixBase<Real> &M,
Expand All @@ -70,12 +70,12 @@ class CuTpMatrix : public CuPackedMatrix<Real> {
}
void CopyFromTp(const TpMatrix<Real> &other) {
CuPackedMatrix<Real>::CopyFromPacked(other);
}
}
void Cholesky(const CuSpMatrix<Real>& Orig);
void Invert();

CuTpMatrix<Real> &operator = (const CuTpMatrix<Real> &in);

protected:
inline const TpMatrix<Real> &Mat() const {
return *(reinterpret_cast<const TpMatrix<Real>* >(this));
Expand Down
2 changes: 1 addition & 1 deletion src/cudamatrix/cu-vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ class CuVectorBase {

/// Default constructor: make it protected so the user cannot
/// instantiate this class.
CuVectorBase<Real>(): data_(NULL), dim_(0) { }
CuVectorBase(): data_(NULL), dim_(0) { }

Real *data_; ///< GPU data pointer (or regular data pointer
///< if CUDA is not compiled in or we have no GPU).
Expand Down
36 changes: 36 additions & 0 deletions src/makefiles/darwin_arm64.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Darwin (macOS) configuration

ifndef DOUBLE_PRECISION
$(error DOUBLE_PRECISION not defined.)
endif
ifndef OPENFSTINC
$(error OPENFSTINC not defined.)
endif
ifndef OPENFSTLIBS
$(error OPENFSTLIBS not defined.)
endif

CXXFLAGS = -std=c++14 -I.. -I$(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
-Wall -Wno-sign-compare -Wno-unused-local-typedefs \
-Wno-deprecated-declarations -Winit-self \
-DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
-DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK \
-pthread \
-g # -O0 -DKALDI_PARANOID

ifeq ($(KALDI_FLAVOR), dynamic)
CXXFLAGS += -fPIC
endif

# Compiler specific flags
COMPILER = $(shell $(CXX) -v 2>&1)
ifeq ($(findstring clang,$(COMPILER)),clang)
# Suppress annoying clang warnings that are perfectly valid per spec.
CXXFLAGS += -Wno-mismatched-tags
else ifeq ($(findstring GCC,$(COMPILER)),GCC)
# Allow implicit conversions between vectors.
CXXFLAGS += -flax-vector-conversions
endif

LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g
LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl
4 changes: 2 additions & 2 deletions src/matrix/kaldi-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -1006,11 +1006,11 @@ class SubMatrix : public MatrixBase<Real> {
MatrixIndexT num_cols,
MatrixIndexT stride);

~SubMatrix<Real>() {}
~SubMatrix() {}

/// This type of constructor is needed for Range() to work [in Matrix base
/// class]. Cannot make it explicit.
SubMatrix<Real> (const SubMatrix &other):
SubMatrix(const SubMatrix &other):
MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_,
other.stride_) {}

Expand Down
26 changes: 13 additions & 13 deletions src/matrix/qr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void House(MatrixIndexT dim, const Real *x, Real *v, Real *beta) {
if (max_x == 0.0) max_x = 1.0;
s = 1.0 / max_x;
}
Real sigma = 0.0;
v[0] = 1.0;
for (MatrixIndexT i = 1; i < dim; i++) {
Expand All @@ -73,7 +73,7 @@ void House(MatrixIndexT dim, const Real *x, Real *v, Real *beta) {
v[0] = x1 - mu;
} else {
v[0] = -sigma / (x1 + mu);
KALDI_ASSERT(KALDI_ISFINITE(v[dim-1]));
KALDI_ASSERT(KALDI_ISFINITE(v[dim-1]));
}
Real v1 = v[0];
Real v1sq = v1 * v1;
Expand Down Expand Up @@ -155,11 +155,11 @@ void HouseBackward(MatrixIndexT dim, const Real *x, Real *v, Real *beta) {
with packed lower-triangular matrices to do it this way. There's also
a shift from one-based to zero-based indexing, so the index
k is transformed k -> n - k, and a corresponding transpose...
Let the original *this be A. This algorithms replaces *this with
a tridiagonal matrix T such that T = Q A Q^T for an orthogonal Q.
Caution: Q is transposed vs. Golub and Van Loan.
If Q != NULL it outputs Q.
If Q != NULL it outputs Q.
*/
template<typename Real>
void SpMatrix<Real>::Tridiagonalize(MatrixBase<Real> *Q) {
Expand Down Expand Up @@ -195,7 +195,7 @@ void SpMatrix<Real>::Tridiagonalize(MatrixBase<Real> *Q) {
if (Q != NULL) { // C.f. Golub, Q is H_1 .. H_n-2... in this
// case we apply them in the opposite order so it's H_n-1 .. H_1,
// but also Q is transposed so we really have Q = H_1 .. H_n-1.
// It's a double negative.
// It's a double negative.
// Anyway, we left-multiply Q by each one. The H_n would each be
// diag(I + beta v v', I) but we don't ever touch the last dims.
// We do (in Matlab notation):
Expand Down Expand Up @@ -309,7 +309,7 @@ void QrStep(MatrixIndexT n,
if (k < n-2) {
// Next is the elements (k+2, k) and (k+2, k-1), to be rotated, again
// backwards.
Real &elem_kp2_k = z,
Real &elem_kp2_k = z,
&elem_kp2_kp1 = off_diag[k+1];
// Note: elem_kp2_k == z would start off as zero because it's
// two off the diagonal, and not been touched yet. Therefore
Expand Down Expand Up @@ -338,7 +338,7 @@ void QrInternal(MatrixIndexT n,
MatrixIndexT counter = 0, max_iters = 500 + 4*n, // Should never take this many iters.
large_iters = 100 + 2*n;
Real epsilon = (pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));

for (; counter < max_iters; counter++) { // this takes the place of "until
// q=n"... we'll break out of the
// loop when we converge.
Expand All @@ -356,7 +356,7 @@ void QrInternal(MatrixIndexT n,
off_diag[i] = 0.0;
}
// The next code works out p, q, and npq which is n - p - q.
// For the definitions of q and p, see Golub and Van Loan; we
// For the definitions of q and p, see Golub and Van Loan; we
// partition the n dims into pieces of size (p, n-p-q, q) where
// the part of size q is diagonal and the part of size n-p-p is
// "unreduced", i.e. has no zero off-diagonal elements.
Expand Down Expand Up @@ -392,7 +392,7 @@ void QrInternal(MatrixIndexT n,
} else {
QrStep(npq, diag + p, off_diag + p,
static_cast<MatrixBase<Real>*>(NULL));
}
}
}
if (counter == max_iters) {
KALDI_WARN << "Failure to converge in QR algorithm. "
Expand Down Expand Up @@ -490,7 +490,7 @@ void SpMatrix<Real>::TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
r.AddSpVec(1.0, S, Q.Row(d), 0.0);
// r = S * q_d
MatrixIndexT counter = 0;
Real end_prod;
Real end_prod = 0;
while (1) { // Normally we'll do this loop only once:
// we repeat to handle cases where r gets very much smaller
// and we want to orthogonalize again.
Expand Down Expand Up @@ -528,11 +528,11 @@ void SpMatrix<Real>::TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
}
}

Matrix<Real> R(lanczos_dim, lanczos_dim);
Matrix<Real> R(lanczos_dim, lanczos_dim);
R.SetUnit();
T.Qr(&R); // Diagonalizes T.
Vector<Real> s_tmp(lanczos_dim);
s_tmp.CopyDiagFromSp(T);
s_tmp.CopyDiagFromSp(T);

// Now T = R * diag(s_tmp) * R^T.
// The next call sorts the elements of s from greatest to least absolute value,
Expand All @@ -544,7 +544,7 @@ void SpMatrix<Real>::TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
SubMatrix<Real> Rsub(R, 0, eig_dim, 0, lanczos_dim);
SubVector<Real> s_sub(s_tmp, 0, eig_dim);
s->CopyFromVec(s_sub);

// For working out what to do now, just assume the other eigenvalues were
// zero. This is just for purposes of knowing how to get the result, and
// not getting things wrongly transposed.
Expand Down
Loading

0 comments on commit 6f61393

Please sign in to comment.