Skip to content

Commit

Permalink
add rules to skip fingerprinting. Improve memory management (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
mscasso-scanoss authored Dec 4, 2023
1 parent c9122d3 commit 4249005
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 11 deletions.
2 changes: 1 addition & 1 deletion external/inc/winnowing.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <stdint.h>

uint32_t winnowing (char *src, uint32_t *hashes, uint32_t *lines, uint32_t limit, uint8_t *grams, uint32_t *windows);
uint32_t winnowing (char *src, uint32_t *hashes, uint32_t *lines, uint32_t limit);
extern uint8_t GRAM; // Winnowing gram size in bytes
extern uint8_t WINDOW; // Winnowing window size in bytes
extern uint32_t MAX_UINT32;
Expand Down
38 changes: 34 additions & 4 deletions external/src/winnowing.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,44 @@ static uint32_t add_hash(uint32_t hash, uint32_t line, uint32_t *hashes, uint32_
"hashes" is filled with hashes and "lines" is filled with the respective line numbers.
The function returns the number of hashes found */

uint32_t winnowing(char *src, uint32_t *hashes, uint32_t *lines, uint32_t limit, uint8_t *grams, uint32_t *windows)
uint32_t winnowing(char *src, uint32_t *hashes, uint32_t *lines, uint32_t limit)
{
uint32_t hash = MAX_UINT32;
uint32_t last = 0;

uint8_t *grams = calloc(limit, 1);
uint32_t *windows = calloc (limit * 4,1);

if (!grams || !windows)
{
free(grams);
free(windows);
return 0;
}

uint8_t *gram = grams;
uint32_t *window = windows;

uint32_t gram_ptr = 0;
uint32_t window_ptr = 0;

/* Process one byte at a time */
uint32_t line = 1;
uint32_t counter = 0;
uint32_t line_char = 0;
while (*src)
{
if (*src == '\n') line++;
if (*src == '\n')
{
line++;
line_char = 0;
}
else
{
line_char++;
}

if (line > 16384)
if (line > 65384 || line_char > 16384)
{
break;
}
Expand All @@ -122,14 +143,23 @@ uint32_t winnowing(char *src, uint32_t *hashes, uint32_t *lines, uint32_t limit,
hash = smaller_hash(window);
last = add_hash(hash, line, hashes, lines, last, &counter);

if (counter >= limit) break;
if (counter >= limit)
break;

window++;
if (window - windows >= limit * 4)
break;

window_ptr = WINDOW - 1;
}
gram++;
if (gram - grams >= limit)
break;
gram_ptr = GRAM - 1;
}
}

free (windows);
free (grams);
return counter;
}
18 changes: 12 additions & 6 deletions src/wfp.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ void extract_wfp(uint8_t *md5, char *src, uint32_t length, bool check_mz)

/* File discrimination check: Binary? */
uint32_t src_ln = strlen(src);
if (length != src_ln) return;

if (length != src_ln || !strchr(src, '\n')) return;

/* Store buffer lengths */
long buffer_ln[256];
Expand All @@ -91,14 +92,20 @@ void extract_wfp(uint8_t *md5, char *src, uint32_t length, bool check_mz)

uint32_t mem_alloc = src_ln > MAX_FILE_SIZE ? src_ln : MAX_FILE_SIZE;

uint8_t *grams = calloc(mem_alloc,1);
uint32_t *windows = calloc (mem_alloc*4,1);
uint8_t *buffer = malloc(WFP_BUFFER_SIZE * 256);
uint32_t *hashes = malloc(mem_alloc);
uint32_t *lines = malloc(mem_alloc);

if (!buffer || !hashes || !lines)
{
free(buffer);
free(hashes);
free(lines);
return;
}

/* Capture hashes (Winnowing) */
uint32_t size = winnowing(src, hashes, lines, mem_alloc, grams, windows);
uint32_t size = winnowing(src, hashes, lines, mem_alloc);

uint8_t n = 0;
uint16_t line = 0;
Expand Down Expand Up @@ -135,11 +142,9 @@ void extract_wfp(uint8_t *md5, char *src, uint32_t length, bool check_mz)
if (buffer_ln[i]) if (!write(out_snippet[i], buffer + (WFP_BUFFER_SIZE * i), buffer_ln[i]))
printf("Warning: error writing snippet sector\n");

free (windows);
free (buffer);
free (hashes);
free (lines);
free (grams);
}

/**
Expand Down Expand Up @@ -174,6 +179,7 @@ void mz_wfp_extract(char *path)

/* Create job structure */
struct mz_job job;
memset(&job, 0, sizeof(job));
strcpy(job.path, path);
memset(job.mz_id, 0, 2);
job.mz = NULL;
Expand Down

0 comments on commit 4249005

Please sign in to comment.