Skip to content

Commit

Permalink
update hash calculation to the new format
Browse files Browse the repository at this point in the history
  • Loading branch information
core software devel committed Sep 9, 2024
1 parent 1b3d596 commit 634aaad
Show file tree
Hide file tree
Showing 17 changed files with 71 additions and 67 deletions.
2 changes: 1 addition & 1 deletion inc/decrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "scanoss.h"

extern char * (*decrypt_data) (uint8_t *data, uint32_t size, struct ldb_table table, uint8_t *key, uint8_t *subkey);
extern void (*decrypt_mz) (uint8_t *data, uint32_t len);
extern void (*decrypt_mz) (int key_ln, uint8_t *data, uint32_t len);
extern void (*encoder_version) (char * version);

char * standalone_decrypt_data(uint8_t *data, uint32_t size,struct ldb_table table, uint8_t *key, uint8_t *subkey);
Expand Down
2 changes: 0 additions & 2 deletions inc/scanoss.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
#define SNIPPET_LINE_TOLERANCE 10

#define WFP_LN 4
#define WFP_REC_LN 18

/* Log files */
#define SCANOSS_VERSION "5.5.0"
#define SCAN_LOG "/tmp/scanoss_scan.log"
Expand Down
5 changes: 1 addition & 4 deletions inc/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,11 @@ void uint32_reverse(uint8_t *data);
void hex_to_bin(char *hex, uint32_t len, uint8_t *out);

/* Compares two MD5 checksums */
bool md5cmp(uint8_t *md51, uint8_t *md52);
bool hashcmp(int hash_len, uint8_t *md51, uint8_t *md52);

/* Trim str */
void trim(char *str);

/* Returns the pair md5 of "component/vendor" */
void vendor_component_md5(char *component, char *vendor, uint8_t *out);

/* Returns the current date stamp */
char *datestamp(void);

Expand Down
4 changes: 2 additions & 2 deletions src/attributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ bool check_purl_attributions(struct ldb_table oss_attributions, char * licenses_
{
/* Get purl md5 */
uint8_t md5[16];
MD5((uint8_t *)purl, strlen(purl), md5);
oss_attribution.hash_calc((uint8_t *)purl, strlen(purl), md5);
if (declared_components[i].license && licenses_json &&
license_search_on_licenses_json(declared_components[i].license, licenses_json))
{
Expand Down Expand Up @@ -269,7 +269,7 @@ void print_purl_attribution_notices(struct ldb_table oss_attributions, char * li
{
/* Get purl md5 */
uint8_t md5[16];
MD5((uint8_t *)purl, strlen(purl), md5);
oss_attribution.hash_calc((uint8_t *)purl, strlen(purl), md5);
print_notices(oss_attributions, md5, purl);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/component.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,14 @@ bool component_date_comparation(component_data_t *a, component_data_t *b)
if (!a->purls_md5[0] && a->purls[0])
{
a->purls_md5[0] = malloc(oss_url.key_ln);
MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
oss_purl.hash_calc((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
a->age = get_component_age(a->purls_md5[0]);
}

if (!b->purls_md5[0] && b->purls[0])
{
b->purls_md5[0] = malloc(oss_purl.key_ln);
MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
oss_purl.hash_calc((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
b->age = get_component_age(b->purls_md5[0]);
}

Expand Down
4 changes: 2 additions & 2 deletions src/decrypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include "decrypt.h"

char * (*decrypt_data) (uint8_t *data, uint32_t size, struct ldb_table table, uint8_t *key, uint8_t *subkey);
void (*decrypt_mz) (uint8_t *data, uint32_t len);
void (*decrypt_mz) (int key_ln, uint8_t *data, uint32_t len);
void (*encoder_version) (char * version);
/**
* @brief Decrypt data function pointer. Will be executed for the ldb_fetch_recordset function in each iteration. See LDB documentation for more details.
Expand All @@ -51,7 +51,7 @@ char * standalone_decrypt_data(uint8_t *data, uint32_t size, struct ldb_table ta
char * msg = NULL;

if (!strcmp(table.table, "file"))
msg = strndup((char*) data + 16, size - 16);
msg = strndup((char*) data + table.key_ln, size - table.key_ln);
else
msg = strndup((char*) data, size);

Expand Down
6 changes: 3 additions & 3 deletions src/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,15 @@ void get_file_md5(char *filepath, uint8_t *md5_result)

if (!in)
{
MD5(NULL, 0, md5_result);
oss_file.hash_calc(NULL, 0, md5_result);
return;
}

fseek(in, 0L, SEEK_END);
long filesize = ftell(in);
if (!filesize)
{
MD5(NULL, 0, md5_result);
oss_file.hash_calc(NULL, 0, md5_result);
}
else
{
Expand All @@ -160,7 +160,7 @@ void get_file_md5(char *filepath, uint8_t *md5_result)
fprintf(stderr, "Warning: cannot open file %s\n", filepath);

/* Calculate MD5sum */
MD5(buffer, filesize, md5_result);
oss_file.hash_calc(buffer, filesize, md5_result);
free(buffer);
fclose(in);
}
Expand Down
25 changes: 23 additions & 2 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,17 @@ bool lib_encoder_load()
#endif
}

static hash_calc_t hash_function_select(int key_ln)
{
if (key_ln == 8)
return ldb_crc64;

return md5_string;
}

/* Initialize tables for the DB name indicated (defaults to oss) */
void initialize_ldb_tables(char *name)
{

char * ldb_ver = NULL;
ldb_version(&ldb_ver);
scanlog("ldb version: %s\n", ldb_ver);
Expand All @@ -132,51 +139,65 @@ void initialize_ldb_tables(char *name)
scanlog("Loading tables definitions\n");
snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "url");
oss_url = ldb_read_cfg(dbtable);
oss_url.hash_calc = hash_function_select(oss_url.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "file");
oss_file = ldb_read_cfg(dbtable);
oss_file.hash_calc = hash_function_select(oss_file.key_ln);

ldb_hash_mode_select(oss_file.key_ln);
//ldb_hash_mode_select(oss_file.key_ln);

if (ldb_table_exists(oss_db_name, "path"))
{
path_table_present = true;
snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "path");
oss_path = ldb_read_cfg(dbtable);
oss_path.hash_calc = hash_function_select(oss_path.key_ln);
}

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "wfp");
oss_wfp = ldb_read_cfg(dbtable);
oss_wfp.hash_calc = hash_function_select(oss_wfp.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "purl");
oss_purl = ldb_read_cfg(dbtable);
oss_purl.hash_calc = hash_function_select(oss_purl.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "copyright");
oss_copyright = ldb_read_cfg(dbtable);
oss_copyright.hash_calc = hash_function_select(oss_copyright.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "quality");
oss_quality = ldb_read_cfg(dbtable);
oss_quality.hash_calc = hash_function_select(oss_quality.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "vulnerability");
oss_vulnerability = ldb_read_cfg(dbtable);
oss_vulnerability.hash_calc = hash_function_select(oss_vulnerability.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "dependency");
oss_dependency = ldb_read_cfg(dbtable);
oss_dependency.hash_calc = hash_function_select(oss_dependency.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "license");
oss_license = ldb_read_cfg(dbtable);
oss_license.hash_calc = hash_function_select(oss_license.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "attribution");
oss_attribution = ldb_read_cfg(dbtable);
oss_attribution.hash_calc = hash_function_select(oss_attribution.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "cryptography");
oss_cryptography = ldb_read_cfg(dbtable);
oss_cryptography.hash_calc = hash_function_select(oss_cryptography.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "sources");
oss_sources = ldb_read_cfg(dbtable);
oss_sources.hash_calc = hash_function_select(oss_sources.key_ln);

snprintf(dbtable, MAX_ARGLN * 2, "%s/%s", oss_db_name, "notices");
oss_notices = ldb_read_cfg(dbtable);
oss_notices.hash_calc = hash_function_select(oss_notices.key_ln);

kb_version_get();
osadl_load_file();
Expand Down
4 changes: 2 additions & 2 deletions src/match.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,14 +320,14 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
if (!a->purls_md5[0] && a->purls[0])
{
a->purls_md5[0] = malloc(oss_purl.key_ln);
MD5((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
oss_purl.hash_calc((uint8_t *)a->purls[0], strlen(a->purls[0]), a->purls_md5[0]);
a->age = get_component_age(a->purls_md5[0]);
}

if (!b->purls_md5[0] && b->purls[0])
{
b->purls_md5[0] = malloc(oss_purl.key_ln);
MD5((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
oss_purl.hash_calc((uint8_t *)b->purls[0], strlen(b->purls[0]), b->purls_md5[0]);
b->age = get_component_age(b->purls_md5[0]);
}

Expand Down
11 changes: 6 additions & 5 deletions src/mz.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void mz_get_key(struct ldb_table kb, char *key)
char mz_path[LDB_MAX_PATH + kb.key_ln];
char mz_file_id[5] = "\0\0\0\0\0";
struct mz_job job;
job.key_ln = kb.key_ln -2;
memcpy(mz_file_id, key, 4);
sprintf(mz_path, "%s/%s/%s/%s.mz", ldb_root, kb.db, kb.table,mz_file_id);

Expand Down Expand Up @@ -79,7 +80,7 @@ void mz_get_key(struct ldb_table kb, char *key)
{
/* Position pointers */
job.id = job.mz + ptr;
uint8_t *file_ln = job.id + MZ_MD5;
uint8_t *file_ln = job.id + job.key_ln;
job.zdata = file_ln + MZ_SIZE;

/* Get compressed data size */
Expand All @@ -88,19 +89,19 @@ void mz_get_key(struct ldb_table kb, char *key)
job.zdata_ln = tmpln;

/* Get total mz record length */
job.ln = MZ_MD5 + MZ_SIZE + job.zdata_ln;
job.ln = job.key_ln + MZ_SIZE + job.zdata_ln;

/* Pass job to handler */
if (!memcmp(job.id, job.key + 2, MZ_MD5))
if (!memcmp(job.id, job.key + 2, job.key_ln))
{
if (kb.definitions & LDB_TABLE_DEFINITION_ENCRYPTED)
{
decrypt_mz(job.id, job.zdata_ln);
decrypt_mz(kb.key_ln, job.id, job.zdata_ln);
}
/* Decompress */
MZ_DEFLATE(&job);

job.data[job.data_ln] = 0;
//job.data[job.data_ln] = 0;
printf("%s", job.data);
return;
}
Expand Down
2 changes: 1 addition & 1 deletion src/query.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,6 @@ void purl_version_md5(uint8_t *out, char *purl, char *version)
{
char purl_version[MAX_ARGLN] = "\0";
sprintf(purl_version, "%s@%s", purl, version);
MD5((uint8_t *)purl_version, strlen(purl_version), out);
oss_purl.hash_calc((uint8_t *)purl_version, strlen(purl_version), out);
}

2 changes: 1 addition & 1 deletion src/report.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ bool print_json_component(component_data_t * component)
if (component->purls[i] && !component->purls_md5[i])
{
component->purls_md5[i] = malloc(oss_purl.key_ln);
MD5((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
oss_purl.hash_calc((uint8_t *)component->purls[i], strlen(component->purls[i]), component->purls_md5[i]);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components)
if (is_bin)
binary_scan(&line[4]);

/* Parse file information with format: file=MD5(32),file_size,file_path */
/* Parse file information with format: file=HASH(16/32),file_size,file_path */
if (is_file)
{
/* A scan data was fullfilled and is ready to be scanned */
Expand Down
28 changes: 20 additions & 8 deletions src/snippets.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ static bool get_all_file_ids(struct ldb_table * table, uint8_t *key, uint8_t *su
{
uint32_t size = uint32_read(record);
/* End recordset fetch if MAX_QUERY_RESPONSE is reached */
if (size + datalen + 4 >= WFP_REC_LN * MATCHMAP_ITEM_SIZE)
if (size + datalen + 4 >= table->rec_ln * MATCHMAP_ITEM_SIZE)
{
return true;
}
Expand Down Expand Up @@ -613,7 +613,7 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
return -1;
}

if (md5cmp(scan->matchmap[t].md5, md5))
if (hashcmp(oss_file.key_ln, scan->matchmap[t].md5, md5))
{
lastwfp = scan->matchmap[t].lastwfp;
found = t;
Expand Down Expand Up @@ -744,13 +744,13 @@ match_t ldb_scan_snippets(scan_data_t *scan)
for (long i = 0; i < scan->hash_count; i++)
{
/* Get all file IDs for given wfp */
map[i].md5_set = malloc(WFP_REC_LN * MATCHMAP_ITEM_SIZE);
map[i].md5_set = malloc(oss_wfp.rec_ln * MATCHMAP_ITEM_SIZE);
wfp_invert(scan->hashes[i], map[i].wfp);
//scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
uint32_write(map[i].md5_set, 0);
map[i].line = scan->lines[i];
ldb_fetch_recordset(NULL, oss_wfp, map[i].wfp, false, get_all_file_ids, (void *)map[i].md5_set);
map[i].size = uint32_read(map[i].md5_set) / WFP_REC_LN;
map[i].size = uint32_read(map[i].md5_set) / oss_wfp.rec_ln;
//Initializate the lines indirection when a wfp from a line has at least one md5 linked
if (map[i].size)
map_lines_indirection[scan->lines[i]] = 0;
Expand All @@ -759,6 +759,18 @@ match_t ldb_scan_snippets(scan_data_t *scan)
map_max_size = map[i].size;

}

/*for (long i = 0; i < scan->hash_count; i++)
{
printf("%02x%02x%02x%02x: ", map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]);
for (int j=0; j < map[i].size; j++)
{
char hex[MD5_LEN_HEX] = "\0";
ldb_bin_to_hex(map[i].md5_set + 4 + j * oss_wfp.rec_ln, oss_file.key_ln, hex);
printf(" %s", hex);
}
printf("\n");
}*/
/* Classify the WFPs in cathegories depending on popularity
Each cathegoy will contain a sub set of index refered to map rows*/
#define MAP_INDIRECTION_CAT_NUMBER 1000
Expand Down Expand Up @@ -888,7 +900,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
/* Add each item to the matchmap*/
for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++)
{
int wfp_p = wfp_index * WFP_REC_LN;
int wfp_p = wfp_index * oss_wfp.rec_ln;
/*Stop when a new sector appers*/
if (md5s[wfp_p] != sector)
{
Expand Down Expand Up @@ -946,7 +958,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
/* Add each item to the matchmap*/
for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++)
{
int wfp_p = wfp_index * WFP_REC_LN;
int wfp_p = wfp_index * oss_wfp.rec_ln;
int sector = md5s[wfp_p];
int sector_max = min_match_hits;

Expand All @@ -955,7 +967,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
else
sector_max = scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits;

if (md5cmp(&md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5))
if (hashcmp(oss_file.key_ln, &md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5))
{
add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], 0, &sector_max, &scan->matchmap_rank_by_sector[sector]);
md5_proceced++;
Expand Down
6 changes: 3 additions & 3 deletions src/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ bool handle_purl_record(struct ldb_table * table, uint8_t *key, uint8_t *subkey,
scanlog("Related PURL: %s\n", purl);
component->purls[i] = purl;
component->purls_md5[i] = malloc(table->key_ln);
MD5((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
oss_purl.hash_calc((uint8_t *)purl, strlen(purl), component->purls_md5[i]);
return false;
}
/* Already exists, exit */
Expand Down Expand Up @@ -247,7 +247,7 @@ void fetch_related_purls(component_data_t *component)
if (!component->purls_md5[0] && component->purls[0])
{
component->purls_md5[0] = malloc(oss_purl.key_ln);
MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
oss_purl.hash_calc((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
}

/* Fill purls */
Expand Down Expand Up @@ -309,7 +309,7 @@ void purl_release_date(char *purl, char *date)
return;

uint8_t purl_md5[oss_purl.key_ln];
MD5((uint8_t *)purl, strlen(purl), purl_md5);
oss_purl.hash_calc((uint8_t *)purl, strlen(purl), purl_md5);

ldb_fetch_recordset(NULL, oss_purl, purl_md5, false, get_purl_first_release, (void *) date);
}
Expand Down
Loading

0 comments on commit 634aaad

Please sign in to comment.