From 4d1aa39522b96a6dfe5b59d597e8b376f40dc7e6 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Wed, 13 Sep 2023 19:51:27 +0200
Subject: [PATCH 1/6] Python performance improvements with ruff C4 and PERF

---
 e2e/modeling/test_dpr.py                          | 10 +++++-----
 examples/basic_qa_pipeline.py                     | 15 ++++++++-------
 haystack/document_stores/base.py                  |  2 +-
 haystack/document_stores/opensearch.py            |  6 +++---
 haystack/modeling/data_handler/dataset.py         |  2 +-
 haystack/modeling/data_handler/processor.py       | 14 +++++++-------
 haystack/modeling/model/biadaptive_model.py       |  2 +-
 haystack/modeling/model/language_model.py         |  8 ++++----
 haystack/modeling/model/prediction_head.py        |  2 +-
 haystack/modeling/model/triadaptive_model.py      |  2 +-
 haystack/nodes/base.py                            |  4 ++--
 .../nodes/document_classifier/transformers.py     |  2 +-
 haystack/nodes/file_converter/azure.py            |  2 +-
 haystack/nodes/file_converter/base.py             |  5 ++---
 haystack/nodes/file_converter/parsr.py            |  2 +-
 haystack/nodes/file_converter/pdf.py              |  2 +-
 haystack/nodes/query_classifier/transformers.py   |  2 +-
 haystack/nodes/ranker/base.py                     |  2 +-
 haystack/nodes/reader/farm.py                     |  4 ++--
 haystack/nodes/retriever/_embedding_encoder.py    |  2 +-
 haystack/nodes/retriever/dense.py                 |  6 +++---
 haystack/nodes/translator/transformers.py         |  2 +-
 haystack/pipelines/base.py                        |  8 ++++----
 haystack/pipelines/config.py                      |  2 +-
 haystack/preview/testing/document_store.py        |  7 +++----
 haystack/schema.py                                |  2 +-
 haystack/testing/document_store.py                |  4 ++--
 haystack/utils/context_matching.py                |  6 +++---
 haystack/utils/deepsetcloud.py                    |  6 +++---
 haystack/utils/preprocessing.py                   |  4 ++--
 pyproject.toml                                    |  5 ++++-
 test/benchmarks/datadog/metric_handler.py         |  2 +-
 test/benchmarks/datadog/send_metrics.py           |  2 +-
 test/benchmarks/utils.py                          |  4 ++--
 test/document_stores/test_elasticsearch.py        |  4 ++--
 test/nodes/test_file_converter.py                 |  2 +-
 test/nodes/test_preprocessor.py                   |  2 +-
 test/nodes/test_reader.py                         |  2 +-
 test/nodes/test_shaper.py                         |  4 ++--
 test/nodes/test_web_search.py                     |  2 +-
 test/others/test_utils.py                         |  2 +-
 test/pipelines/test_eval.py                       | 12 ++++++------
 test/prompt/test_prompt_template.py               |  8 ++++----
 43 files changed, 95 insertions(+), 93 deletions(-)

diff --git a/e2e/modeling/test_dpr.py b/e2e/modeling/test_dpr.py
index 1cee3ac87b..57016f2fe7 100644
--- a/e2e/modeling/test_dpr.py
+++ b/e2e/modeling/test_dpr.py
@@ -777,7 +777,7 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     # generate embeddings with model loaded from model hub
     dataset, tensor_names, _, __ = processor.dataset_from_dicts(
-        dicts=[d], indices=[i for i in range(len([d]))], return_baskets=True
+        dicts=[d], indices=list(range(len([d]))), return_baskets=True
     )
 
     data_loader = NamedDataLoader(
@@ -811,7 +811,7 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     # generate embeddings with model loaded from disk
     dataset2, tensor_names2, _, __ = loaded_processor.dataset_from_dicts(
-        dicts=[d], indices=[i for i in range(len([d]))], return_baskets=True
+        dicts=[d], indices=list(range(len([d]))), return_baskets=True
     )
 
     data_loader = NamedDataLoader(
@@ -820,7 +820,7 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
     all_embeddings2: Dict[str, Any] = {"query": [], "passages": []}
     loaded_model.eval()
 
-    for i, batch in enumerate(tqdm(data_loader, desc="Creating Embeddings", unit=" Batches", disable=True)):
+    for batch in tqdm(data_loader, desc="Creating Embeddings", unit=" Batches", disable=True):
         batch = {key: batch[key].to(device) for key in batch}
 
         # get logits
@@ -904,7 +904,7 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     # generate embeddings with model loaded from disk that originated from a FARM style model that was saved to disk earlier
     dataset3, tensor_names3, _, __ = loaded_processor.dataset_from_dicts(
-        dicts=[d], indices=[i for i in range(len([d]))], return_baskets=True
+        dicts=[d], indices=list(range(len([d]))), return_baskets=True
     )
 
     data_loader = NamedDataLoader(
@@ -913,7 +913,7 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
     all_embeddings3: Dict[str, Any] = {"query": [], "passages": []}
     loaded_model.eval()
 
-    for i, batch in enumerate(tqdm(data_loader, desc="Creating Embeddings", unit=" Batches", disable=True)):
+    for batch in tqdm(data_loader, desc="Creating Embeddings", unit=" Batches", disable=True):
         batch = {key: batch[key].to(device) for key in batch}
 
         # get logits
diff --git a/examples/basic_qa_pipeline.py b/examples/basic_qa_pipeline.py
index 9e90114d0e..b58e0ccfe9 100644
--- a/examples/basic_qa_pipeline.py
+++ b/examples/basic_qa_pipeline.py
@@ -1,16 +1,17 @@
 import logging
 from pathlib import Path
 
-logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
-logging.getLogger("haystack").setLevel(logging.INFO)
-
 from haystack.document_stores import ElasticsearchDocumentStore
-from haystack.utils import fetch_archive_from_http, print_answers, launch_es
-from haystack.nodes import FARMReader, BM25Retriever
+from haystack.nodes import BM25Retriever, FARMReader
 from haystack.nodes.file_classifier import FileTypeClassifier
-from haystack.nodes.preprocessor import PreProcessor
 from haystack.nodes.file_converter import TextConverter
+from haystack.nodes.preprocessor import PreProcessor
 from haystack.pipelines import Pipeline
+from haystack.utils import fetch_archive_from_http, launch_es, print_answers
+
+# pylint: disable=no-logging-basicconfig
+logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
+logging.getLogger("haystack").setLevel(logging.INFO)
 
 
 def basic_qa_pipeline():
@@ -22,7 +23,7 @@ def basic_qa_pipeline():
     s3_url = "https://core-engineering.s3.eu-central-1.amazonaws.com/public/scripts/wiki_gameofthrones_txt1.zip"
     fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
 
-    file_paths = [p for p in Path(doc_dir).glob("**/*")]
+    file_paths = list(Path(doc_dir).glob("**/*"))
     files_metadata = [{"name": path.name} for path in file_paths]
 
     # Indexing Pipeline
diff --git a/haystack/document_stores/base.py b/haystack/document_stores/base.py
index a943dc35b1..514b6eae0d 100644
--- a/haystack/document_stores/base.py
+++ b/haystack/document_stores/base.py
@@ -595,7 +595,7 @@ def _drop_duplicate_documents(self, documents: List[Document], index: Optional[s
         :param index: name of the index
         :return: A list of Haystack Document objects.
         """
-        _hash_ids: Set = set([])
+        _hash_ids: Set = set()
         _documents: List[Document] = []
 
         for document in documents:
diff --git a/haystack/document_stores/opensearch.py b/haystack/document_stores/opensearch.py
index 0b05bbb8be..4b41a9d741 100644
--- a/haystack/document_stores/opensearch.py
+++ b/haystack/document_stores/opensearch.py
@@ -1215,11 +1215,11 @@ def _get_embedding_field_mapping(
     def _ivf_model_exists(self, index: str) -> bool:
         if self._index_exists(".opensearch-knn-models"):
             response = self.client.transport.perform_request("GET", "/_plugins/_knn/models/_search")
-            existing_ivf_models = set(
+            existing_ivf_models = {
                 model["_source"]["model_id"]
                 for model in response["hits"]["hits"]
                 if model["_source"]["state"] != "failed"
-            )
+            }
         else:
             existing_ivf_models = set()
 
@@ -1461,7 +1461,7 @@ def _delete_ivf_model(self, index: str):
         """
         if self._index_exists(".opensearch-knn-models"):
             response = self.client.transport.perform_request("GET", "/_plugins/_knn/models/_search")
-            existing_ivf_models = set(model["_source"]["model_id"] for model in response["hits"]["hits"])
+            existing_ivf_models = {model["_source"]["model_id"] for model in response["hits"]["hits"]}
             if f"{index}-ivf" in existing_ivf_models:
                 self.client.transport.perform_request("DELETE", f"/_plugins/_knn/models/{index}-ivf")
 
diff --git a/haystack/modeling/data_handler/dataset.py b/haystack/modeling/data_handler/dataset.py
index 6c073a96b7..3a8df06283 100644
--- a/haystack/modeling/data_handler/dataset.py
+++ b/haystack/modeling/data_handler/dataset.py
@@ -27,7 +27,7 @@ def flatten_rename(
     assert any(key in encoded_batch for key in keys), f"one of the keys {keys} is not in batch {encoded_batch.keys()}"
     features_flat = []
     for item in range(len(encoded_batch[keys[0]])):
-        feat_dict = {k: v for k, v in zip(renamed_keys, [encoded_batch[k][item] for k in keys])}
+        feat_dict = dict(zip(renamed_keys, [encoded_batch[k][item] for k in keys]))
         features_flat.append(feat_dict)
     return features_flat
 
diff --git a/haystack/modeling/data_handler/processor.py b/haystack/modeling/data_handler/processor.py
index 6d5cda47fb..3556419496 100644
--- a/haystack/modeling/data_handler/processor.py
+++ b/haystack/modeling/data_handler/processor.py
@@ -174,7 +174,7 @@ def load_from_dir(cls, load_dir: str):
             config = json.load(f)
         config["inference"] = True
         # init tokenizer
-        if "lower_case" in config.keys():
+        if "lower_case" in config:
             logger.warning(
                 "Loading tokenizer from deprecated config. "
                 "If you used `custom_vocab` or `never_split_chars`, this won't work anymore."
@@ -1249,7 +1249,7 @@ def _combine_title_context(titles: List[str], texts: List[str]):
                     "Couldn't find title although `embed_title` is set to True for DPR. Using title='' now. Related passage text: '%s' ",
                     ctx,
                 )
-            res.append(tuple((title, ctx)))
+            res.append((title, ctx))
         return res
 
 
@@ -1762,7 +1762,7 @@ def _combine_meta_context(meta_fields: List[str], texts: List[str]):
         for meta, ctx in zip(meta_fields, texts):
             if meta is None:
                 meta = ""
-            res.append(tuple((meta, ctx)))
+            res.append((meta, ctx))
         return res
 
 
@@ -2111,12 +2111,12 @@ def dataset_from_dicts(
             truncation=True,
             max_length=self.max_seq_len,
         )
-        names = [key for key in tokens]
+        names = list(tokens)
         inputs = [tokens[key] for key in tokens]
-        if not "padding_mask" in names:
+        if "padding_mask" not in names:
             index = names.index("attention_mask")
             names[index] = "padding_mask"
-        if not "segment_ids" in names:
+        if "segment_ids" not in names:
             index = names.index("token_type_ids")
             names[index] = "segment_ids"
 
@@ -2149,7 +2149,7 @@ def write_squad_predictions(predictions, out_filename, predictions_filename=None
                         dev_labels[q["id"]] = "is_impossible"
                     else:
                         dev_labels[q["id"]] = q["answers"][0]["text"]
-        not_included = set(list(dev_labels.keys())) - set(list(predictions_json.keys()))
+        not_included = dev_labels.keys() - predictions_json.keys()
         if len(not_included) > 0:
             logger.info("There were missing predictions for question ids: %s", list(not_included))
         for x in not_included:
diff --git a/haystack/modeling/model/biadaptive_model.py b/haystack/modeling/model/biadaptive_model.py
index 0ebcf072e0..3e717484d0 100644
--- a/haystack/modeling/model/biadaptive_model.py
+++ b/haystack/modeling/model/biadaptive_model.py
@@ -322,7 +322,7 @@ def forward(
                     output2 = None
 
                 embedding1, embedding2 = head(output1, output2)
-                all_logits.append(tuple([embedding1, embedding2]))
+                all_logits.append((embedding1, embedding2))
         else:
             # just return LM output (e.g. useful for extracting embeddings at inference time)
             all_logits.append((pooled_output))
diff --git a/haystack/modeling/model/language_model.py b/haystack/modeling/model/language_model.py
index eaa83febd7..63582419b5 100644
--- a/haystack/modeling/model/language_model.py
+++ b/haystack/modeling/model/language_model.py
@@ -108,14 +108,14 @@ def output_dims(self):
         if self._output_dims:
             return self._output_dims
 
-        for odn in OUTPUT_DIM_NAMES:
-            try:
+        try:
+            for odn in OUTPUT_DIM_NAMES:
                 value = getattr(self.model.config, odn, None)
                 if value:
                     self._output_dims = value
                     return value
-            except AttributeError:
-                raise ModelingError("Can't get the output dimension before loading the model.")
+        except AttributeError:
+            raise ModelingError("Can't get the output dimension before loading the model.")
 
         raise ModelingError("Could not infer the output dimensions of the language model.")
 
diff --git a/haystack/modeling/model/prediction_head.py b/haystack/modeling/model/prediction_head.py
index 351e4045d0..aefb3eade2 100644
--- a/haystack/modeling/model/prediction_head.py
+++ b/haystack/modeling/model/prediction_head.py
@@ -732,7 +732,7 @@ def aggregate_preds(self, preds, passage_start_t, ids, seq_2_start_t=None, label
             all_basket_labels = {k: self.reduce_labels(v) for k, v in all_basket_labels.items()}
 
         # Return aggregated predictions in order as a list of lists
-        keys = [k for k in all_basket_preds]
+        keys = list(all_basket_preds)
         aggregated_preds = [all_basket_preds[k] for k in keys]
         if labels:
             labels = [all_basket_labels[k] for k in keys]
diff --git a/haystack/modeling/model/triadaptive_model.py b/haystack/modeling/model/triadaptive_model.py
index 4e92b8ed90..5d831eee1a 100644
--- a/haystack/modeling/model/triadaptive_model.py
+++ b/haystack/modeling/model/triadaptive_model.py
@@ -283,7 +283,7 @@ def forward(self, **kwargs):
                     output2 = None
 
                 embedding1, embedding2 = head(output1, output2)
-                all_logits.append(tuple([embedding1, embedding2]))
+                all_logits.append((embedding1, embedding2))
         else:
             # just return LM output (e.g. useful for extracting embeddings at inference time)
             all_logits.append((pooled_output))
diff --git a/haystack/nodes/base.py b/haystack/nodes/base.py
index 7ba07174dc..a471b8c3f5 100644
--- a/haystack/nodes/base.py
+++ b/haystack/nodes/base.py
@@ -96,7 +96,7 @@ def name(self, value: str):
     @property
     def utilized_components(self) -> List[BaseComponent]:
         if "params" not in self._component_config:
-            return list()
+            return []
         return [param for param in self._component_config["params"].values() if isinstance(param, BaseComponent)]
 
     @property
@@ -229,7 +229,7 @@ def _dispatch_run_general(self, run_method: Callable, **kwargs):
                     if "debug" in value.keys():
                         self.debug = value.pop("debug")
 
-                    for _k, _v in value.items():
+                    for _k in value.keys():
                         if _k not in run_signature_args:
                             raise Exception(f"Invalid parameter '{_k}' for the node '{self.name}'.")
 
diff --git a/haystack/nodes/document_classifier/transformers.py b/haystack/nodes/document_classifier/transformers.py
index 02ba6356fc..f5c2a84f6b 100644
--- a/haystack/nodes/document_classifier/transformers.py
+++ b/haystack/nodes/document_classifier/transformers.py
@@ -202,7 +202,7 @@ def predict(self, documents: List[Document], batch_size: Optional[int] = None) -
                 formatted_prediction = {
                     "label": prediction["labels"][0],
                     "score": prediction["scores"][0],
-                    "details": {label: score for label, score in zip(prediction["labels"], prediction["scores"])},
+                    "details": dict(zip(prediction["labels"], prediction["scores"])),
                 }
             elif self.task == "text-classification":
                 formatted_prediction = {
diff --git a/haystack/nodes/file_converter/azure.py b/haystack/nodes/file_converter/azure.py
index 8ddbcd0495..fca5811346 100644
--- a/haystack/nodes/file_converter/azure.py
+++ b/haystack/nodes/file_converter/azure.py
@@ -203,7 +203,7 @@ def _convert_tables_and_text(
                 if not isinstance(table.content, pd.DataFrame):
                     raise HaystackError("Document's content field must be of type 'pd.DataFrame'.")
                 for _, row in table.content.iterrows():
-                    for _, cell in row.items():
+                    for cell in row.values():
                         file_text += f" {cell}"
             if not self.validate_language(file_text, valid_languages):
                 logger.warning(
diff --git a/haystack/nodes/file_converter/base.py b/haystack/nodes/file_converter/base.py
index 7bd24e04aa..d5fbb3fc58 100644
--- a/haystack/nodes/file_converter/base.py
+++ b/haystack/nodes/file_converter/base.py
@@ -202,15 +202,14 @@ def run(  # type: ignore
         for file_path, file_meta in tqdm(
             zip(file_paths, meta), total=len(file_paths), disable=not self.progress_bar, desc="Converting files"
         ):
-            for doc in self.convert(
+            documents += self.convert(
                 file_path=file_path,
                 meta=file_meta,
                 remove_numeric_tables=remove_numeric_tables,
                 valid_languages=valid_languages,
                 encoding=encoding,
                 id_hash_keys=id_hash_keys,
-            ):
-                documents.append(doc)
+            )
 
         # Cleanup ligatures
         for document in documents:
diff --git a/haystack/nodes/file_converter/parsr.py b/haystack/nodes/file_converter/parsr.py
index dceca036f7..55217c2eb7 100644
--- a/haystack/nodes/file_converter/parsr.py
+++ b/haystack/nodes/file_converter/parsr.py
@@ -199,7 +199,7 @@ def convert(
                 if not isinstance(table.content, pd.DataFrame):
                     raise HaystackError("Document's content field must be of type 'pd.DataFrame'.")
                 for _, row in table.content.iterrows():
-                    for _, cell in row.items():
+                    for cell in row.values():
                         file_text += f" {cell}"
             if not self.validate_language(file_text, valid_languages):
                 logger.warning(
diff --git a/haystack/nodes/file_converter/pdf.py b/haystack/nodes/file_converter/pdf.py
index 5915541315..7ce6e6ccdd 100644
--- a/haystack/nodes/file_converter/pdf.py
+++ b/haystack/nodes/file_converter/pdf.py
@@ -286,7 +286,7 @@ def _read_pdf(
                 document += page.get_text("text", textpage=partial_tp, sort=sort_by_position) + "\f"
         else:
             cpu = cpu_count() if isinstance(multiprocessing, bool) else multiprocessing
-            page_list = [i for i in range(start_page, end_page)]
+            page_list = list(range(start_page, end_page))
             cpu = cpu if len(page_list) > cpu else len(page_list)
             parts = divide(cpu, page_list)
             pages_mp = [(i, file_path, parts, sort_by_position, ocr, ocr_language) for i in range(cpu)]
diff --git a/haystack/nodes/query_classifier/transformers.py b/haystack/nodes/query_classifier/transformers.py
index 7fb926e041..1369589608 100644
--- a/haystack/nodes/query_classifier/transformers.py
+++ b/haystack/nodes/query_classifier/transformers.py
@@ -126,7 +126,7 @@ def __init__(
 
         self.labels = labels
         if task == "text-classification":
-            labels_from_model = [label for label in self.model.model.config.id2label.values()]
+            labels_from_model = list(self.model.model.config.id2label.values())
             if set(labels) != set(labels_from_model):
                 raise ValueError(
                     f"For text-classification, the provided labels must match the model labels; only the order can differ.\n"
diff --git a/haystack/nodes/ranker/base.py b/haystack/nodes/ranker/base.py
index 034936e40b..186cc88470 100644
--- a/haystack/nodes/ranker/base.py
+++ b/haystack/nodes/ranker/base.py
@@ -55,7 +55,7 @@ def _add_meta_fields_to_docs(
             for key in embed_meta_fields:
                 if key in doc.meta and doc.meta[key]:
                     if isinstance(doc.meta[key], list):
-                        meta_data_fields.extend([item for item in doc.meta[key]])
+                        meta_data_fields.extend(list(doc.meta[key]))
                     else:
                         meta_data_fields.append(doc.meta[key])
             # Convert to type string (e.g. for ints or floats)
diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py
index 5302051928..4e39d3d65e 100644
--- a/haystack/nodes/reader/farm.py
+++ b/haystack/nodes/reader/farm.py
@@ -1153,10 +1153,10 @@ def eval(
                             }
 
             # Get rid of the question key again (after we aggregated we don't need it anymore)
-            d[str(doc_id)]["qas"] = [v for v in aggregated_per_question.values()]
+            d[str(doc_id)]["qas"] = list(aggregated_per_question.values())
 
         # Convert input format for FARM
-        farm_input = [v for v in d.values()]
+        farm_input = list(d.values())
         n_queries = len([y for x in farm_input for y in x["qas"]])
 
         # Create DataLoader that can be passed to the Evaluator
diff --git a/haystack/nodes/retriever/_embedding_encoder.py b/haystack/nodes/retriever/_embedding_encoder.py
index cab092df3c..571a47499b 100644
--- a/haystack/nodes/retriever/_embedding_encoder.py
+++ b/haystack/nodes/retriever/_embedding_encoder.py
@@ -394,7 +394,7 @@ def embed(self, model: str, text: List[str]) -> np.ndarray:
             raise CohereUnauthorizedError(f"Invalid Cohere API key. {response.text}")
         if response.status_code != 200:
             raise CohereError(response.text, status_code=response.status_code)
-        generated_embeddings = [e for e in res["embeddings"]]
+        generated_embeddings = list(res["embeddings"])
         return np.array(generated_embeddings)
 
     def embed_batch(self, text: List[str]) -> np.ndarray:
diff --git a/haystack/nodes/retriever/dense.py b/haystack/nodes/retriever/dense.py
index eea47327e3..eddff140ce 100644
--- a/haystack/nodes/retriever/dense.py
+++ b/haystack/nodes/retriever/dense.py
@@ -484,7 +484,7 @@ def _get_predictions(self, dicts: List[Dict[str, Any]]) -> Dict[str, np.ndarray]
         :return: dictionary of embeddings for "passages" and "query"
         """
         dataset, tensor_names, _, _ = self.processor.dataset_from_dicts(
-            dicts, indices=[i for i in range(len(dicts))], return_baskets=True
+            dicts, indices=list(range(len(dicts))), return_baskets=True
         )
 
         data_loader = NamedDataLoader(
@@ -1113,7 +1113,7 @@ def _get_predictions(self, dicts: List[Dict[str, Any]]) -> Dict[str, np.ndarray]
         """
 
         dataset, tensor_names, _, _ = self.processor.dataset_from_dicts(
-            dicts, indices=[i for i in range(len(dicts))], return_baskets=True
+            dicts, indices=list(range(len(dicts))), return_baskets=True
         )
 
         data_loader = NamedDataLoader(
@@ -1862,7 +1862,7 @@ def _preprocess_documents(self, docs: List[Document]) -> List[Document]:
             for key in self.embed_meta_fields:
                 if key in doc.meta and doc.meta[key]:
                     if isinstance(doc.meta[key], list):
-                        meta_data_fields.extend([item for item in doc.meta[key]])
+                        meta_data_fields.extend(list(doc.meta[key]))
                     else:
                         meta_data_fields.append(doc.meta[key])
             # Convert to type string (e.g. for ints or floats)
diff --git a/haystack/nodes/translator/transformers.py b/haystack/nodes/translator/transformers.py
index 8cc3384373..69e98ee631 100644
--- a/haystack/nodes/translator/transformers.py
+++ b/haystack/nodes/translator/transformers.py
@@ -168,7 +168,7 @@ def translate(
             return translated_texts[0]
         elif documents:
             if isinstance(documents, list) and isinstance(documents[0], str):
-                return [translated_text for translated_text in translated_texts]
+                return list(translated_texts)
 
             translated_documents: Union[
                 List[Document], List[Answer], List[str], List[Dict[str, Any]]
diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index ea81f3e554..5d51528c8c 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -284,7 +284,7 @@ def save_to_deepset_cloud(
         index_config = index_pipeline.get_config()
         pipelines = query_config["pipelines"] + index_config["pipelines"]
         all_components = query_config["components"] + index_config["components"]
-        distinct_components = [c for c in {component["name"]: component for component in all_components}.values()]
+        distinct_components = list({component["name"]: component for component in all_components}.values())
         document_stores = [c for c in distinct_components if c["type"].endswith("DocumentStore")]
         for document_store in document_stores:
             if document_store["type"] != "DeepsetCloudDocumentStore":
@@ -827,10 +827,10 @@ def eval_beir(
             logger.info("Cropping dataset from %s to %s documents", len(corpus), num_documents)
             corpus = dict(itertools.islice(corpus.items(), num_documents))
             # Remove queries that don't contain the remaining documents
-            corpus_ids = set(list(corpus.keys()))
+            corpus_ids = set(corpus.keys())
             qrels_new = {}
             for query_id, document_rel_dict in qrels.items():
-                document_rel_ids_intersection = list(corpus_ids & set(list(document_rel_dict.keys())))
+                document_rel_ids_intersection = list(corpus_ids & set(document_rel_dict.keys()))
                 # If there are no remaining documents related to the query, delete the query
                 if len(document_rel_ids_intersection) == 0:
                     del queries[query_id]
@@ -1957,7 +1957,7 @@ def get_document_store(self) -> Optional[BaseDocumentStore]:
         matches = self.get_nodes_by_class(class_type=BaseDocumentStore)
         if len(matches) == 0:
             matches = list(
-                set(retriever.document_store for retriever in self.get_nodes_by_class(class_type=BaseRetriever))
+                {retriever.document_store for retriever in self.get_nodes_by_class(class_type=BaseRetriever)}
             )
 
         if len(matches) > 1:
diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py
index 114330584d..10fef0f255 100644
--- a/haystack/pipelines/config.py
+++ b/haystack/pipelines/config.py
@@ -119,7 +119,7 @@ def build_component_dependency_graph(
     graph = nx.DiGraph()
     for component_name, component_definition in component_definitions.items():
         params = component_definition.get("params", {})
-        referenced_components: List[str] = list()
+        referenced_components: List[str] = []
         for param_value in params.values():
             # Currently we don't do any additional type validation here.
             # See https://github.com/deepset-ai/haystack/pull/2253#discussion_r815951591.
diff --git a/haystack/preview/testing/document_store.py b/haystack/preview/testing/document_store.py
index 9127fdab1c..e4ae2b6bb3 100644
--- a/haystack/preview/testing/document_store.py
+++ b/haystack/preview/testing/document_store.py
@@ -316,14 +316,13 @@ def test_nin_filter_embedding(self, docstore: DocumentStore, filterable_docs: Li
         result = docstore.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
         assert self.contains_same_docs(
             result,
-            [
-                doc
-                for doc in filterable_docs
+            list(
+                filterable_docs
                 or (
                     not np.array_equal(embedding_zeros, doc.embedding)  # type: ignore
                     and not np.array_equal(embedding_ones, doc.embedding)  # type: ignore
                 )
-            ],
+            ),
         )
 
     @pytest.mark.unit
diff --git a/haystack/schema.py b/haystack/schema.py
index a6547e5460..c4f7cbd1c2 100644
--- a/haystack/schema.py
+++ b/haystack/schema.py
@@ -1486,7 +1486,7 @@ def find_matched_label_idxs(row) -> List[int]:  # pylint: disable=too-many-retur
             gold_document_ids = [id for id in gold_document_ids if id != "00"]
 
             num_labels = len(gold_document_ids)
-            num_matched_labels = len(set(idx for idxs in relevant_rows["matched_label_idxs"] for idx in idxs))
+            num_matched_labels = len({idx for idxs in relevant_rows["matched_label_idxs"] for idx in idxs})
             num_missing_labels = num_labels - num_matched_labels
 
             relevance_criterion_ids = list(relevant_rows["document_id"].values)
diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py
index cba0d5950e..9e2f2ad928 100644
--- a/haystack/testing/document_store.py
+++ b/haystack/testing/document_store.py
@@ -73,8 +73,8 @@ def test_write_documents(self, ds, documents):
         ds.write_documents(documents)
         docs = ds.get_all_documents()
         assert len(docs) == len(documents)
-        expected_ids = set(doc.id for doc in documents)
-        ids = set(doc.id for doc in docs)
+        expected_ids = {doc.id for doc in documents}
+        ids = {doc.id for doc in docs}
         assert ids == expected_ids
 
     @pytest.mark.integration
diff --git a/haystack/utils/context_matching.py b/haystack/utils/context_matching.py
index f18980204f..80cfb1bd0b 100644
--- a/haystack/utils/context_matching.py
+++ b/haystack/utils/context_matching.py
@@ -147,7 +147,7 @@ def match_context(
 
         matches = (candidate for candidate in candidate_scores if candidate.score > threshold)
         sorted_matches = sorted(matches, key=lambda candidate: candidate.score, reverse=True)
-        match_list = list((candidate_score.candidate_id, candidate_score.score) for candidate_score in sorted_matches)
+        match_list = [(candidate_score.candidate_id, candidate_score.score) for candidate_score in sorted_matches]
 
         return match_list
 
@@ -208,13 +208,13 @@ def match_contexts(
         if show_progress:
             candidate_scores = tqdm(candidate_scores)
 
-        match_lists: List[List[Tuple[str, float]]] = list()
+        match_lists: List[List[Tuple[str, float]]] = []
         matches = (candidate for candidate in candidate_scores if candidate.score > threshold)
         group_sorted_matches = sorted(matches, key=lambda candidate: candidate.context_id)
         grouped_matches = groupby(group_sorted_matches, key=lambda candidate: candidate.context_id)
         for context_id, group in grouped_matches:
             sorted_group = sorted(group, key=lambda candidate: candidate.score, reverse=True)
-            match_list = list((candiate_score.candidate_id, candiate_score.score) for candiate_score in sorted_group)
+            match_list = [(candiate_score.candidate_id, candiate_score.score) for candiate_score in sorted_group]
             match_lists.insert(context_id, match_list)
 
         return match_lists
diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py
index b215a22f12..24ab50b3c4 100644
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@@ -902,7 +902,7 @@ def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]:
         """
         evaluation_sets_response = self._get_evaluation_sets(workspace=workspace)
 
-        return [eval_set for eval_set in evaluation_sets_response]
+        return list(evaluation_sets_response)
 
     def _get_evaluation_sets(self, workspace: Optional[str] = None) -> Generator:
         url = self._build_workspace_url(workspace=workspace)
@@ -1166,7 +1166,7 @@ def get_eval_runs(self, workspace: Optional[str] = None, headers: Optional[dict]
         workspace_url = self._build_workspace_url(workspace)
         eval_run_url = f"{workspace_url}/eval_runs"
         response = self.client.get_with_auto_paging(eval_run_url, headers=headers)
-        return [eval_run for eval_run in response]
+        return list(response)
 
     def delete_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: Optional[dict] = None):
         """
@@ -1279,7 +1279,7 @@ def get_eval_run_predictions(
         workspace_url = self._build_workspace_url(workspace)
         eval_run_prediction_url = f"{workspace_url}/eval_runs/{eval_run_name}/nodes/{node_name}/predictions"
         response = self.client.get_with_auto_paging(eval_run_prediction_url, headers=headers)
-        return [prediction for prediction in response]
+        return list(response)
 
     def _build_workspace_url(self, workspace: Optional[str] = None):
         if workspace is None:
diff --git a/haystack/utils/preprocessing.py b/haystack/utils/preprocessing.py
index de80665dad..55b2cce9a3 100644
--- a/haystack/utils/preprocessing.py
+++ b/haystack/utils/preprocessing.py
@@ -34,7 +34,7 @@ def convert_files_to_docs(
     # Importing top-level causes a circular import
     from haystack.nodes.file_converter import BaseConverter, DocxToTextConverter, PDFToTextConverter, TextConverter
 
-    file_paths = [p for p in Path(dir_path).glob("**/*")]
+    file_paths = list(Path(dir_path).glob("**/*"))
     allowed_suffixes = [".pdf", ".txt", ".docx"]
     suffix2converter: Dict[str, BaseConverter] = {}
 
@@ -115,7 +115,7 @@ def tika_convert_files_to_docs(
         logger.error("Tika not installed. Please install tika and try again. Error: %s", ex)
         raise ex
     converter = TikaConverter()
-    paths = [p for p in Path(dir_path).glob("**/*")]
+    paths = list(Path(dir_path).glob("**/*"))
     allowed_suffixes = [".pdf", ".txt"]
     file_paths: List[Path] = []
 
diff --git a/pyproject.toml b/pyproject.toml
index 3f5497f49c..c6a1373324 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -381,6 +381,7 @@ plugins = [
 select = [
   "AIR",    # Airflow
   "ASYNC",  # flake8-async
+  "C4",     # flake8-comprehensions
   "C90",    # McCabe cyclomatic complexity
   "CPY",    # flake8-copyright
   "DJ",     # flake8-django
@@ -389,6 +390,7 @@ select = [
   "F",      # Pyflakes
   "FURB",   # refurb
   "INT",    # flake8-gettext
+  "PERF",   # Perflint
   "PL",     # Pylint
   "Q",      # flake8-quotes
   "SLOT",   # flake8-slots
@@ -398,7 +400,6 @@ select = [
   # "E",    # pycodestyle
   # "NPY",  # NumPy-specific rules
   # "PD",   # pandas-vet
-  # "PERF", # Perflint
   # "PT",   # flake8-pytest-style
   # "UP",   # pyupgrade
 ]
@@ -406,6 +407,8 @@ line-length = 1486
 target-version = "py38"
 ignore = [
   "F401",     # unused-import
+  "PERF401",	# Use a list comprehension to create a transformed list
+  "PERF203",	# `try`-`except` within a loop incurs performance overhead
   "PLR1714",  # repeated-equality-comparison
   "PLR5501",  # collapsible-else-if
   "PLW0603",  # global-statement
diff --git a/test/benchmarks/datadog/metric_handler.py b/test/benchmarks/datadog/metric_handler.py
index 2b0c3afc9a..d56f8d4929 100644
--- a/test/benchmarks/datadog/metric_handler.py
+++ b/test/benchmarks/datadog/metric_handler.py
@@ -118,7 +118,7 @@ def __init__(self, datadog_api_key: str, datadog_host: str):
     def send_custom_dd_metric(self, metric: CustomDatadogMetric) -> dict:
         datadog.initialize(api_key=self.datadog_api_key, api_host=self.datadog_host)
 
-        tags: List[str] = list(map(lambda t: str(t.value), metric.tags))
+        tags: List[str] = [str(t.value) for t in metric.tags]
         post_metric_response: Dict = datadog.api.Metric.send(
             metric=metric.name, points=[(metric.timestamp, metric.value)], tags=tags
         )
diff --git a/test/benchmarks/datadog/send_metrics.py b/test/benchmarks/datadog/send_metrics.py
index fcaefa1f5c..54d7346a4d 100644
--- a/test/benchmarks/datadog/send_metrics.py
+++ b/test/benchmarks/datadog/send_metrics.py
@@ -107,7 +107,7 @@ def get_benchmark_type_tag(reader_tag, retriever_tag, document_store_tag):
 def collect_metrics_from_json_files(folder_path):
     benchmark_metrics = parse_benchmark_files(folder_path)
     metrics_to_send_to_dd = []
-    for benchmark_name, metrics in benchmark_metrics.items():
+    for metrics in benchmark_metrics.values():
         indexing_metrics = metrics["indexing"]
         querying_metrics = metrics["querying"]
         config = metrics["config"]
diff --git a/test/benchmarks/utils.py b/test/benchmarks/utils.py
index cfbf659458..8ef0ae438c 100644
--- a/test/benchmarks/utils.py
+++ b/test/benchmarks/utils.py
@@ -175,7 +175,7 @@ def contains_reader(pipeline: Pipeline) -> bool:
     Check if a pipeline contains a Reader component.
     :param pipeline: Pipeline
     """
-    components = [comp for comp in pipeline.components.values()]
+    components = list(pipeline.components.values())
     return any(isinstance(comp, BaseReader) for comp in components)
 
 
@@ -183,5 +183,5 @@ def contains_retriever(pipeline: Pipeline) -> bool:
     """
     Check if a pipeline contains a Retriever component.
     """
-    components = [comp for comp in pipeline.components.values()]
+    components = list(pipeline.components.values())
     return any(isinstance(comp, BaseRetriever) for comp in components)
diff --git a/test/document_stores/test_elasticsearch.py b/test/document_stores/test_elasticsearch.py
index cd73198a89..adfdeb0a82 100644
--- a/test/document_stores/test_elasticsearch.py
+++ b/test/document_stores/test_elasticsearch.py
@@ -283,8 +283,8 @@ def test_elasticsearch_brownfield_support(self, ds, documents):
         assert all("name" in doc.meta for doc in transferred_documents)
         assert all(doc.id == doc._get_id(["content", "meta"]) for doc in transferred_documents)
 
-        original_content = set([doc.content for doc in original_documents])
-        transferred_content = set([doc.content for doc in transferred_documents])
+        original_content = {doc.content for doc in original_documents}
+        transferred_content = {doc.content for doc in transferred_documents}
         assert original_content == transferred_content
 
         # Test transferring docs with PreProcessor
diff --git a/test/nodes/test_file_converter.py b/test/nodes/test_file_converter.py
index 53c726b313..d1bbb2fc1b 100644
--- a/test/nodes/test_file_converter.py
+++ b/test/nodes/test_file_converter.py
@@ -394,7 +394,7 @@ def test_id_hash_keys_from_pipeline_params(samples_path):
     converter = TextConverter()
     output, _ = converter.run(file_paths=[doc_path, doc_path], meta=meta, id_hash_keys=["content", "meta"])
     documents = output["documents"]
-    unique_ids = set(d.id for d in documents)
+    unique_ids = {d.id for d in documents}
 
     assert len(documents) == 2
     assert len(unique_ids) == 2
diff --git a/test/nodes/test_preprocessor.py b/test/nodes/test_preprocessor.py
index 9b3f9c8d48..222841a54f 100644
--- a/test/nodes/test_preprocessor.py
+++ b/test/nodes/test_preprocessor.py
@@ -237,7 +237,7 @@ def test_id_hash_keys_from_pipeline_params():
     preprocessor = PreProcessor(split_length=2, split_respect_sentence_boundary=False)
     output, _ = preprocessor.run(documents=[document_1, document_2], id_hash_keys=["content", "meta"])
     documents = output["documents"]
-    unique_ids = set(d.id for d in documents)
+    unique_ids = {d.id for d in documents}
 
     assert len(documents) == 4
     assert len(unique_ids) == 4
diff --git a/test/nodes/test_reader.py b/test/nodes/test_reader.py
index bb77e57e41..c97bf418f5 100644
--- a/test/nodes/test_reader.py
+++ b/test/nodes/test_reader.py
@@ -160,7 +160,7 @@ def test_deduplication_for_overlapping_documents(reader):
     prediction = reader.predict(query="Where does Carla live?", documents=docs, top_k=5)
 
     # Check that there are no duplicate answers
-    assert len(set(ans.answer for ans in prediction["answers"])) == len(prediction["answers"])
+    assert len({ans.answer for ans in prediction["answers"]}) == len(prediction["answers"])
 
 
 @pytest.mark.integration
diff --git a/test/nodes/test_shaper.py b/test/nodes/test_shaper.py
index 4c4fc3923e..4d494de052 100644
--- a/test/nodes/test_shaper.py
+++ b/test/nodes/test_shaper.py
@@ -937,7 +937,7 @@ def test_strings_to_answers_after_prompt_node_yaml(tmp_path):
     )
     results = result["answers"]
     assert len(results) == 4
-    assert any([True for r in results if "Berlin" in r.answer])
+    assert any(True for r in results if "Berlin" in r.answer)
     for answer in results[:2]:
         assert answer.document_ids == ["123"]
         assert (
@@ -1527,7 +1527,7 @@ def test_with_multiple_prompt_nodes(tmp_path):
     )
     results = result["answers"]
     assert len(results) == 2
-    assert any([True for r in results if "Berlin" in r.answer])
+    assert any(True for r in results if "Berlin" in r.answer)
 
 
 @pytest.mark.unit
diff --git a/test/nodes/test_web_search.py b/test/nodes/test_web_search.py
index 70c3c9c0f5..9e9ce1f9dd 100644
--- a/test/nodes/test_web_search.py
+++ b/test/nodes/test_web_search.py
@@ -39,7 +39,7 @@ def test_web_search_with_site_keyword():
     assert len(result["documents"]) > 0
     assert isinstance(result["documents"][0], Document)
     assert all(
-        ["nasa" in doc.meta["link"] or "lifewire" in doc.meta["link"] for doc in result["documents"]]
+        "nasa" in doc.meta["link"] or "lifewire" in doc.meta["link"] for doc in result["documents"]
     ), "Some documents are not from the specified sites lifewire.com or nasa.gov."
 
 
diff --git a/test/others/test_utils.py b/test/others/test_utils.py
index 31690c7962..c3382b8dd1 100644
--- a/test/others/test_utils.py
+++ b/test/others/test_utils.py
@@ -546,7 +546,7 @@ def test_list_files_on_deepset_cloud():
         )
 
     client = DeepsetCloud.get_file_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
-    files = [f for f in client.list_files()]
+    files = list(client.list_files())
     assert len(files) == 2
     assert files[0]["name"] == "sample_pdf_1.pdf"
     assert files[1]["name"] == "sample_pdf_2.pdf"
diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py
index 73aa2ae6bd..19b952c14d 100644
--- a/test/pipelines/test_eval.py
+++ b/test/pipelines/test_eval.py
@@ -537,10 +537,10 @@ def test_extractive_qa_eval(reader, retriever_with_docs, tmp_path, eval_labels):
 
     # all expected columns are part of the evaluation result dataframe
     assert sorted(expected_reader_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(reader_result.columns)
+        reader_result.columns
     )
     assert sorted(expected_retriever_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(retriever_result.columns)
+        retriever_result.columns
     )
 
     assert (
@@ -676,10 +676,10 @@ def test_generative_qa_eval(retriever_with_docs, tmp_path, eval_labels):
 
     # all expected columns are part of the evaluation result dataframe
     assert sorted(expected_generator_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(generator_result.columns)
+        generator_result.columns
     )
     assert sorted(expected_retriever_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(retriever_result.columns)
+        retriever_result.columns
     )
 
     assert generator_result["prompt"].iloc[0] is not None
@@ -777,10 +777,10 @@ def test_generative_qa_w_promptnode_eval(retriever_with_docs, tmp_path, eval_lab
 
     # all expected columns are part of the evaluation result dataframe
     assert sorted(expected_generator_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(generator_result.columns)
+        generator_result.columns
     )
     assert sorted(expected_retriever_result_columns + expected_generic_result_columns + ["index"]) == sorted(
-        list(retriever_result.columns)
+        retriever_result.columns
     )
 
     assert generator_result["prompt"].iloc[0] is not None
diff --git a/test/prompt/test_prompt_template.py b/test/prompt/test_prompt_template.py
index cf8254d3d4..78c458a17a 100644
--- a/test/prompt/test_prompt_template.py
+++ b/test/prompt/test_prompt_template.py
@@ -345,7 +345,7 @@ def test_prompt_template_syntax_fill(
         self, prompt_text: str, documents: List[Document], query: str, expected_prompts: List[str]
     ):
         prompt_template = PromptTemplate(prompt_text)
-        prompts = [prompt for prompt in prompt_template.fill(documents=documents, query=query)]
+        prompts = list(prompt_template.fill(documents=documents, query=query))
         assert prompts == expected_prompts
 
     @pytest.mark.unit
@@ -372,7 +372,7 @@ def test_prompt_template_syntax_fill(
     )
     def test_join(self, prompt_text: str, documents: List[Document], expected_prompts: List[str]):
         prompt_template = PromptTemplate(prompt_text)
-        prompts = [prompt for prompt in prompt_template.fill(documents=documents)]
+        prompts = list(prompt_template.fill(documents=documents))
         assert prompts == expected_prompts
 
     @pytest.mark.unit
@@ -405,7 +405,7 @@ def test_join(self, prompt_text: str, documents: List[Document], expected_prompt
     )
     def test_to_strings(self, prompt_text: str, documents: List[Document], expected_prompts: List[str]):
         prompt_template = PromptTemplate(prompt_text)
-        prompts = [prompt for prompt in prompt_template.fill(documents=documents)]
+        prompts = list(prompt_template.fill(documents=documents))
         assert prompts == expected_prompts
 
     @pytest.mark.unit
@@ -466,7 +466,7 @@ def test_prompt_template_syntax_fill_ignores_dangerous_input(
         self, prompt_text: str, documents: List[Document], query: str, expected_prompts: List[str]
     ):
         prompt_template = PromptTemplate(prompt_text)
-        prompts = [prompt for prompt in prompt_template.fill(documents=documents, query=query)]
+        prompts = list(prompt_template.fill(documents=documents, query=query))
         assert prompts == expected_prompts
 
     def test_prompt_template_remove_template_params(self):

From 564556a509a7b628b06b5c0f79370d9225e20513 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 15 Sep 2023 18:48:20 +0200
Subject: [PATCH 2/6] pre-commit fixes

---
 .pre-commit-config.yaml                                     | 2 +-
 releasenotes/notes/refactor-pinecone-document-store.yaml    | 6 +++---
 .../support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml   | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 23464f67fa..a973508ad8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.0.289
   hooks:
-  - id: ruff  
+  - id: ruff
 
 - repo: https://github.com/codespell-project/codespell
   rev: v2.2.5
diff --git a/releasenotes/notes/refactor-pinecone-document-store.yaml b/releasenotes/notes/refactor-pinecone-document-store.yaml
index d67d134a34..b8145ac50b 100644
--- a/releasenotes/notes/refactor-pinecone-document-store.yaml
+++ b/releasenotes/notes/refactor-pinecone-document-store.yaml
@@ -1,6 +1,6 @@
 ---
 enhancements:
   - |
-    Refactor PineconeDocumentStore to use metadata instead of namespaces 
-    for distinction between documents with embeddings, documents without 
-    embeddings and labels
\ No newline at end of file
+    Refactor PineconeDocumentStore to use metadata instead of namespaces
+    for distinction between documents with embeddings, documents without
+    embeddings and labels
diff --git a/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml b/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
index 642831b752..9fc499418c 100644
--- a/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
+++ b/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
@@ -2,4 +2,3 @@
 fixes:
   - |
     gpt-35-turbo-16k model from Azure can integrate correctly
-

From 6788fc908f7fb1979a992ddbb732bef6ded3b5e8 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 15 Sep 2023 19:14:04 +0200
Subject: [PATCH 3/6] Revert changes to examples/basic_qa_pipeline.py

---
 examples/basic_qa_pipeline.py | 15 +++++++--------
 pyproject.toml                |  1 +
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/basic_qa_pipeline.py b/examples/basic_qa_pipeline.py
index b58e0ccfe9..9e90114d0e 100644
--- a/examples/basic_qa_pipeline.py
+++ b/examples/basic_qa_pipeline.py
@@ -1,17 +1,16 @@
 import logging
 from pathlib import Path
 
+logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
+logging.getLogger("haystack").setLevel(logging.INFO)
+
 from haystack.document_stores import ElasticsearchDocumentStore
-from haystack.nodes import BM25Retriever, FARMReader
+from haystack.utils import fetch_archive_from_http, print_answers, launch_es
+from haystack.nodes import FARMReader, BM25Retriever
 from haystack.nodes.file_classifier import FileTypeClassifier
-from haystack.nodes.file_converter import TextConverter
 from haystack.nodes.preprocessor import PreProcessor
+from haystack.nodes.file_converter import TextConverter
 from haystack.pipelines import Pipeline
-from haystack.utils import fetch_archive_from_http, launch_es, print_answers
-
-# pylint: disable=no-logging-basicconfig
-logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
-logging.getLogger("haystack").setLevel(logging.INFO)
 
 
 def basic_qa_pipeline():
@@ -23,7 +22,7 @@ def basic_qa_pipeline():
     s3_url = "https://core-engineering.s3.eu-central-1.amazonaws.com/public/scripts/wiki_gameofthrones_txt1.zip"
     fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
 
-    file_paths = list(Path(doc_dir).glob("**/*"))
+    file_paths = [p for p in Path(doc_dir).glob("**/*")]
     files_metadata = [{"name": path.name} for path in file_paths]
 
     # Indexing Pipeline
diff --git a/pyproject.toml b/pyproject.toml
index c6a1373324..e4ab823355 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -420,6 +420,7 @@ ignore = [
 max-complexity = 28
 
 [tool.ruff.per-file-ignores]
+"examples/basic_qa_pipeline.py" = ["C416"]
 "haystack/preview/testing/document_store.py" = ["F821"]
 "haystack/telemetry.py" = ["F821"]
 

From 97c4c2cf7702cee090bf7f804c24474399752faa Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 15 Sep 2023 19:36:09 +0200
Subject: [PATCH 4/6] Revert changes to
 haystack/preview/testing/document_store.py

---
 haystack/preview/testing/document_store.py | 7 ++++---
 pyproject.toml                             | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/haystack/preview/testing/document_store.py b/haystack/preview/testing/document_store.py
index e4ae2b6bb3..9127fdab1c 100644
--- a/haystack/preview/testing/document_store.py
+++ b/haystack/preview/testing/document_store.py
@@ -316,13 +316,14 @@ def test_nin_filter_embedding(self, docstore: DocumentStore, filterable_docs: Li
         result = docstore.filter_documents(filters={"embedding": {"$nin": [embedding_ones, embedding_zeros]}})
         assert self.contains_same_docs(
             result,
-            list(
-                filterable_docs
+            [
+                doc
+                for doc in filterable_docs
                 or (
                     not np.array_equal(embedding_zeros, doc.embedding)  # type: ignore
                     and not np.array_equal(embedding_ones, doc.embedding)  # type: ignore
                 )
-            ),
+            ],
         )
 
     @pytest.mark.unit
diff --git a/pyproject.toml b/pyproject.toml
index e4ab823355..144473722b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -421,7 +421,7 @@ max-complexity = 28
 
 [tool.ruff.per-file-ignores]
 "examples/basic_qa_pipeline.py" = ["C416"]
-"haystack/preview/testing/document_store.py" = ["F821"]
+"haystack/preview/testing/document_store.py" = ["C416", "F821"]
 "haystack/telemetry.py" = ["F821"]
 
 [tool.ruff.pylint]

From c884e2552341a3c2d103c70aa6d7b79ddc4709c7 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Sat, 16 Sep 2023 00:43:06 +0200
Subject: [PATCH 5/6] revert releasenotes

---
 releasenotes/notes/refactor-pinecone-document-store.yaml    | 6 +++---
 .../support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml   | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/releasenotes/notes/refactor-pinecone-document-store.yaml b/releasenotes/notes/refactor-pinecone-document-store.yaml
index b8145ac50b..d67d134a34 100644
--- a/releasenotes/notes/refactor-pinecone-document-store.yaml
+++ b/releasenotes/notes/refactor-pinecone-document-store.yaml
@@ -1,6 +1,6 @@
 ---
 enhancements:
   - |
-    Refactor PineconeDocumentStore to use metadata instead of namespaces
-    for distinction between documents with embeddings, documents without
-    embeddings and labels
+    Refactor PineconeDocumentStore to use metadata instead of namespaces 
+    for distinction between documents with embeddings, documents without 
+    embeddings and labels
\ No newline at end of file
diff --git a/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml b/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
index 9fc499418c..642831b752 100644
--- a/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
+++ b/releasenotes/notes/support-azure-3.5-gpt-16k-model-ece0cfe03260748c.yaml
@@ -2,3 +2,4 @@
 fixes:
   - |
     gpt-35-turbo-16k model from Azure can integrate correctly
+

From 40f3efde4e21f090b17983f67f0bea3ba83398a1 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Sat, 16 Sep 2023 09:19:12 +0200
Subject: [PATCH 6/6] Upgrade to ruff v0.0.290

---
 .pre-commit-config.yaml            | 2 +-
 haystack/nodes/other/join_docs.py  | 2 +-
 haystack/nodes/retriever/sparse.py | 5 +----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a973508ad8..5375c94d75 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
   - id: black-jupyter
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.0.289
+  rev: v0.0.290
   hooks:
   - id: ruff
 
diff --git a/haystack/nodes/other/join_docs.py b/haystack/nodes/other/join_docs.py
index 27761535ce..4185873a7c 100644
--- a/haystack/nodes/other/join_docs.py
+++ b/haystack/nodes/other/join_docs.py
@@ -82,7 +82,7 @@ def run_accumulated(self, inputs: List[dict], top_k_join: Optional[int] = None):
                     "score would be `-infinity`."
                 )
         else:
-            sorted_docs = [(k, v) for k, v in scores_map.items()]
+            sorted_docs = list(scores_map.items())
 
         if not top_k_join:
             top_k_join = self.top_k_join
diff --git a/haystack/nodes/retriever/sparse.py b/haystack/nodes/retriever/sparse.py
index 61f7b0c8d0..c32dddf0a6 100644
--- a/haystack/nodes/retriever/sparse.py
+++ b/haystack/nodes/retriever/sparse.py
@@ -457,10 +457,7 @@ def _get_all_paragraphs(self, document_store: BaseDocumentStore, index: Optional
     def _calc_scores(self, queries: List[str], index: str) -> List[Dict[int, float]]:
         question_vector = self.vectorizer.transform(queries)
         doc_scores_per_query = self.tfidf_matrices[index].dot(question_vector.T).T.toarray()
-        doc_scores_per_query = [
-            [(doc_idx, doc_score) for doc_idx, doc_score in enumerate(doc_scores)]
-            for doc_scores in doc_scores_per_query
-        ]
+        doc_scores_per_query = [list(enumerate(doc_scores)) for doc_scores in doc_scores_per_query]
         indices_and_scores: List[Dict] = [
             OrderedDict(sorted(query_idx_scores, key=lambda tup: tup[1], reverse=True))
             for query_idx_scores in doc_scores_per_query