version bump + forgotten file on m7

djm34 · Feb 25, 2017 · f01177d · f01177d
1 parent 733dd67
commit f01177d
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 2 deletions.
diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h
@@ -161,10 +161,10 @@
 #define PACKAGE_NAME "ccminer"
 
 /* Define to the home page for this package. */
-#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
+#define PACKAGE_URL "http://github.com/djm34/ccminer-msvc2015"
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "0.0.1-djm34"
+#define PACKAGE_VERSION "0.0.2-djm34"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be

diff --git a/x17/cuda_x17_haval256.cu b/x17/cuda_x17_haval256.cu
@@ -31,6 +31,7 @@
 #include <memory.h>
 
 #include "cuda_helper.h"
+__constant__ uint32_t c_PaddedMessage80[32];
 
 #define F1(x6, x5, x4, x3, x2, x1, x0) \
 	(((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0))
@@ -334,6 +335,108 @@ void x17_haval256_gpu_hash_64(const uint32_t threads, uint32_t startNounce, uint
 	}
 }
 
+
+__global__ void m7_haval256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+		uint32_t nounce = startNounce + thread;
+
+		union {
+			uint32_t h4[16];
+			uint64_t h8[8];
+		} hash;
+
+		uint32_t buf[32];
+
+		uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+		 uint32_t u0 = s0 = 0x243F6A88;
+		 uint32_t u1 = s1 = 0x85A308D3;
+		 uint32_t u2 = s2 = 0x13198A2E;
+		 uint32_t u3 = s3 = 0x03707344;
+		 uint32_t u4 = s4 = 0xA4093822;
+		 uint32_t u5 = s5 = 0x299F31D0;
+		 uint32_t u6 = s6 = 0x082EFA98;
+		 uint32_t u7 = s7 = 0xEC4E6C89;
+
+		///////// input big /////////////////////        
+#pragma unroll 29
+		for (int i = 0; i<29; i++) {
+			buf[i] = c_PaddedMessage80[i];
+		}
+		buf[29] = nounce;
+		buf[30] = c_PaddedMessage80[30] + 0x00010000;  //need to fix that
+		buf[31] = 0;
+
+		PASS1(5, buf);
+		PASS2(5, buf);
+		PASS3(5, buf);
+		PASS4(5, buf);
+		PASS5(5, buf);
+
+
+		s0 = (s0 + u0);
+		s1 = (s1 + u1);
+		s2 = (s2 + u2);
+		s3 = (s3 + u3);
+		s4 = (s4 + u4);
+		s5 = (s5 + u5);
+		s6 = (s6 + u6);
+		s7 = (s7 + u7);
+		u0 = s0;
+		u1 = s1;
+		u2 = s2;
+		u3 = s3;
+		u4 = s4;
+		u5 = s5;
+		u6 = s6;
+		u7 = s7;
+
+
+		/////////////////////
+#pragma unroll 32
+		for (int i = 0; i<32; i++) { buf[i] = 0; }
+
+		buf[29] = 0x40290000;
+		buf[30] = 0x000003d0;
+
+
+
+		PASS1(5, buf);
+		PASS2(5, buf);
+		PASS3(5, buf);
+		PASS4(5, buf);
+		PASS5(5, buf);
+
+
+		s0 = (s0 + u0);
+		s1 = (s1 + u1);
+		s2 = (s2 + u2);
+		s3 = (s3 + u3);
+		s4 = (s4 + u4);
+		s5 = (s5 + u5);
+		s6 = (s6 + u6);
+		s7 = (s7 + u7);
+		////////////////////
+		hash.h4[0] = s0;
+		hash.h4[1] = s1;
+		hash.h4[2] = s2;
+		hash.h4[3] = s3;
+		hash.h4[4] = s4;
+		hash.h4[5] = s5;
+		hash.h4[6] = s6;
+		hash.h4[7] = s7;
+
+#pragma unroll 4
+		for (int i = 0; i<4; i++) { outputHash[i*threads + thread] = hash.h8[i]; }
+	} // threads
+}
+
+
 __host__
 void x17_haval256_cpu_init(int thr_id, uint32_t threads)
 {
@@ -351,3 +454,29 @@ void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce
 
 	//MyStreamSynchronize(NULL, order, thr_id);
 }
+
+__host__ void haval256_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage + 122, 0, 6);
+	cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 32 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+
+}
+
+__host__ void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtob Grösse. NICHT ÄNDERN
+
+									 // berechne wie viele Thread Blocks wir brauchen
+	dim3 grid(threads / threadsperblock);
+	dim3 block(threadsperblock);
+	//	dim3 grid(1);
+	//	dim3 block(1);
+	size_t shared_size = 0;
+
+	m7_haval256_gpu_hash_120 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}