From 687d398fc0468009a58f9ae96c70f13fd0af0dfb Mon Sep 17 00:00:00 2001 From: yaoqiankun <410728991@qq.com> Date: Mon, 30 Sep 2024 11:42:53 +0800 Subject: [PATCH 1/4] Simplify the usage of dict --- graphrag/entity_resolution.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index c4c0dc3496..498ce5df06 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -93,16 +93,13 @@ def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | None = No node_clusters[graph.nodes[node]['entity_type']].append(node) candidate_resolution = {entity_type: [] for entity_type in entity_types} - for node_cluster in node_clusters.items(): + for k, v in node_clusters.items(): candidate_resolution_tmp = [] - for a in node_cluster[1]: - for b in node_cluster[1]: - if a == b: - continue - if self.is_similarity(a, b) and (b, a) not in candidate_resolution_tmp: + for a in v: + for b in v[1:]: + if self.is_similarity(a, b): candidate_resolution_tmp.append((a, b)) - if candidate_resolution_tmp: - candidate_resolution[node_cluster[0]] = candidate_resolution_tmp + candidate_resolution[k] = candidate_resolution_tmp or v gen_conf = {"temperature": 0.5} resolution_result = set() From e22aa1126dd122ad64990378e2504aa085e682b8 Mon Sep 17 00:00:00 2001 From: yaoqiankun <410728991@qq.com> Date: Mon, 30 Sep 2024 16:35:14 +0800 Subject: [PATCH 2/4] fix --- graphrag/entity_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index 498ce5df06..fc552b3d30 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -97,7 +97,7 @@ def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | None = No candidate_resolution_tmp = [] for a in v: for b in v[1:]: - if self.is_similarity(a, b): + if self.is_similarity(a, b) and (b, a) not in candidate_resolution_tmp: candidate_resolution_tmp.append((a, b)) candidate_resolution[k] = candidate_resolution_tmp or v From abaa8771e4dcdf3244b1dc8a5252b131330e5d5d Mon Sep 17 00:00:00 2001 From: yaoqiankun <410728991@qq.com> Date: Mon, 30 Sep 2024 16:42:44 +0800 Subject: [PATCH 3/4] gix --- graphrag/entity_resolution.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index fc552b3d30..d82b4bce99 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import itertools import logging import re import traceback @@ -95,10 +95,9 @@ def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | None = No candidate_resolution = {entity_type: [] for entity_type in entity_types} for k, v in node_clusters.items(): candidate_resolution_tmp = [] - for a in v: - for b in v[1:]: - if self.is_similarity(a, b) and (b, a) not in candidate_resolution_tmp: - candidate_resolution_tmp.append((a, b)) + for a, b in itertools.permutations(v, 2): + if self.is_similarity(a, b) and (b, a) not in candidate_resolution_tmp: + candidate_resolution_tmp.append((a, b)) candidate_resolution[k] = candidate_resolution_tmp or v gen_conf = {"temperature": 0.5} From e63750ba6907fc6b532fc6638067c22f6b3f97c1 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 30 Sep 2024 16:54:07 +0800 Subject: [PATCH 4/4] Update graphrag/entity_resolution.py --- graphrag/entity_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphrag/entity_resolution.py b/graphrag/entity_resolution.py index d82b4bce99..b3398015a9 100644 --- a/graphrag/entity_resolution.py +++ b/graphrag/entity_resolution.py @@ -98,7 +98,7 @@ def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | None = No for a, b in itertools.permutations(v, 2): if self.is_similarity(a, b) and (b, a) not in candidate_resolution_tmp: candidate_resolution_tmp.append((a, b)) - candidate_resolution[k] = candidate_resolution_tmp or v + candidate_resolution[k] = candidate_resolution_tmp gen_conf = {"temperature": 0.5} resolution_result = set()