MicheleCotrufo · MicheleCotrufo · Jun 8, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/pdf2bib/bibtex_makers.py b/pdf2bib/bibtex_makers.py
@@ -178,7 +178,9 @@ def make_bibtex(metadata):
     id = id.lower()
     id = remove_latex_codes(id)
     id = unidecode(id) #This makes sure that the id of the bibtex entry is only made out of ascii characters (i.e. no accents, tildes, etc.)
-    id = re.sub('-|,', '', id) #Make sure to remove any possible hyphen and comma
+    id = re.sub(
+        "-|,|:|'|\\n", "", id
+    ) # Make sure to remove any possible hyphen, comma, colon, single quote, and newline.
     if id == '':
         id = 'NoValidID'
 
@@ -215,4 +217,4 @@ def remove_latex_codes(text):
     #This regex looks for any substring that matches the pattern "{\string1{string2}}" where string1 can be anything,
     #and it replaces the whole substring by string2
     text_sanitized = re.sub(r"{\\[^\{]+{([\w]+)}}", r"\1",text)
-    return text_sanitized
+    return text_sanitized
diff --git a/pdf2bib/main.py b/pdf2bib/main.py
@@ -46,6 +46,9 @@ def pdf2bib(target):
     # Setup logging
     logger = logging.getLogger("pdf2bib")
 
+    # Make sure the path is a string in case a Pathlib object is provided
+    target = str(target)
+
     #Check if path is valid
     if not(path.exists(target)):
         logger.error(f"{target} is not a valid path to a file or a directory.")
@@ -135,9 +138,18 @@ def pdf2bib_singlefile(filename):
 
     logger.info(f"pdf2doi found a valid identifier for this paper.") 
 
-    if result['identifier_type'] in ['arxiv ID','arxiv DOI']:
+    if result["identifier_type"] == "arxiv ID":
+        logger.info(f"Parsing the info returned by export.arxiv.org...")
+        metadata = bibtex_makers.parse_bib_from_exportarxivorg(
+            result["validation_info"]
+        )
+    elif result["identifier_type"] == "arxiv DOI":
+        if "arxiv_doi" not in result["validation_info"]:
+            result["validation_info"]["arxiv_doi"] = result["identifier"]
         logger.info(f"Parsing the info returned by export.arxiv.org...")
-        metadata = bibtex_makers.parse_bib_from_exportarxivorg(result['validation_info'])
+        metadata = bibtex_makers.parse_bib_from_exportarxivorg(
+            result["validation_info"]
+        )
     elif result['identifier_type'] == 'DOI':
         logger.info(f"Parsing the info returned by dx.doi.org...")
         metadata = bibtex_makers.parse_bib_from_dxdoiorg(result['validation_info'], method=pdf2doi.config.get('method_dxdoiorg'))
@@ -303,4 +315,4 @@ def main():
     return
 
 if __name__ == '__main__':
-    main()
+    main()