Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
wj-Mcat committed Jan 17, 2024
2 parents 4a7e8ab + cf907fc commit 794fae0
Show file tree
Hide file tree
Showing 73 changed files with 5,153 additions and 195 deletions.
2 changes: 1 addition & 1 deletion .github/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ coverage:
threshold: 1% # Allow the coverage to drop by 1%, and posting a success status.
patch:
default:
target: 80% # lines adjusted Coverage < 80% CI will fail
target: 80% # lines adjusted Coverage < 80% CI will fail
36 changes: 23 additions & 13 deletions applications/neural_search/recall/in_batch_negative/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,29 @@
# limitations under the License.

import argparse
import time

import numpy as np

import time

parser = argparse.ArgumentParser()
parser.add_argument("--similar_text_pair", type=str,
default='', help="The full path of similar pair file")
parser.add_argument("--recall_result_file", type=str,
default='', help="The full path of recall result file")
parser.add_argument("--recall_num", type=int, default=10,
help="Most similar number of doc recalled from corpus per query")
parser.add_argument(
"--similar_text_pair",
type=str,
default="",
help="The full path of similar pair file",
)
parser.add_argument(
"--recall_result_file",
type=str,
default="",
help="The full path of recall result file",
)
parser.add_argument(
"--recall_num",
type=int,
default=10,
help="Most similar number of doc recalled from corpus per query",
)


args = parser.parse_args()
Expand Down Expand Up @@ -62,17 +73,16 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []

text, recalled_text, cosine_sim = line.rstrip().split("\t")
if text2similar[text] == recalled_text:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10, 20, 50]
for topN in recall_num:
Expand Down
9 changes: 4 additions & 5 deletions applications/neural_search/recall/simcse/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,16 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []

text, recalled_text, cosine_sim = line.rstrip().split("\t")
if text2similar[text] == recalled_text:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10, 20, 50]
result = open("result.tsv", "a")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []

text, recalled_text, cosine_sim = line.rstrip().split("\t")
if text2similar[text] == recalled_text:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10]
result = open("result.tsv", "a")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []

text, recalled_text, cosine_sim = line.rstrip().split("\t")
if text2similar[text] == recalled_text:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10]
result = open("result.tsv", "a")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,23 @@
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("--similar_text_pair", type=str, default="", help="The full path of similar pair file")
parser.add_argument("--recall_result_file", type=str, default="", help="The full path of recall result file")
parser.add_argument(
"--recall_num", type=int, default=10, help="Most similair number of doc recalled from corpus per query"
"--similar_text_pair",
type=str,
default="",
help="The full path of similar pair file",
)
parser.add_argument(
"--recall_result_file",
type=str,
default="",
help="The full path of recall result file",
)
parser.add_argument(
"--recall_num",
type=int,
default=10,
help="Most similair number of doc recalled from corpus per query",
)
args = parser.parse_args()

Expand Down Expand Up @@ -57,17 +70,24 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []
text_arr = line.rstrip().split("\t")
text_title, text_para, recalled_title, recalled_para, label, cosine_sim = text_arr
(
text_title,
text_para,
recalled_title,
recalled_para,
label,
cosine_sim,
) = text_arr
if text2similar["\t".join([text_title, text_para])] == label:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10, 20, 50]
for topN in recall_num:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,23 @@
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("--similar_text_pair", type=str, default="", help="The full path of similar pair file")
parser.add_argument("--recall_result_file", type=str, default="", help="The full path of recall result file")
parser.add_argument(
"--recall_num", type=int, default=10, help="Most similar number of doc recalled from corpus per query"
"--similar_text_pair",
type=str,
default="",
help="The full path of similar pair file",
)
parser.add_argument(
"--recall_result_file",
type=str,
default="",
help="The full path of recall result file",
)
parser.add_argument(
"--recall_num",
type=int,
default=10,
help="Most similar number of doc recalled from corpus per query",
)
args = parser.parse_args()

Expand Down Expand Up @@ -57,17 +70,24 @@ def recall(rs, N=10):
with open(args.recall_result_file, "r", encoding="utf-8") as f:
relevance_labels = []
for index, line in enumerate(f):

if index % args.recall_num == 0 and index != 0:
rs.append(relevance_labels)
relevance_labels = []
text_arr = line.rstrip().split("\t")
text_title, text_para, recalled_title, recalled_para, label, cosine_sim = text_arr
(
text_title,
text_para,
recalled_title,
recalled_para,
label,
cosine_sim,
) = text_arr
if text2similar["\t".join([text_title, text_para])] == label:
relevance_labels.append(1)
else:
relevance_labels.append(0)

if (index + 1) % args.recall_num == 0:
rs.append(relevance_labels)
relevance_labels = []

recall_N = []
recall_num = [1, 5, 10, 20, 50]
for topN in recall_num:
Expand Down
69 changes: 69 additions & 0 deletions csrc/generation/get_output.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stdio.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#include "paddle/extension.h"

#define MAX_BSZ 512

struct msgdata {
long mtype;
int mtext[MAX_BSZ + 2]; // stop_flag, bsz, tokens
};

void GetOutput(const paddle::Tensor& x,
int64_t rank_id,
bool wait_flag) {
if (rank_id > 0) return;

static struct msgdata msg_rcv;

static key_t key = ftok("./", 1);

static int msgid = msgget(key, IPC_CREAT | 0666);

int64_t *out_data = const_cast<int64_t*>(x.data<int64_t>());
int ret = -1;
if (!wait_flag) {
ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, IPC_NOWAIT);
} else {
ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, 0);
}
if(ret == -1)
{
// read none
out_data[0] = -2;
out_data[1] = 0;
return;
}

int bsz = msg_rcv.mtext[1];

for (int64_t i = 0; i < bsz + 2; i++) {
out_data[i] = (int64_t)msg_rcv.mtext[i];
}
return;
}

PD_BUILD_OP(get_output)
.Inputs({"x"})
.Attrs({"rank_id: int64_t",
"wait_flag: bool"})
.Outputs({"x_out"})
.SetInplaceMap({{"x", "x_out"}})
.SetKernelFn(PD_KERNEL(GetOutput));
Loading

0 comments on commit 794fae0

Please sign in to comment.