From 41606e2b618701fde0a6e681bdfb201c4f456814 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Sat, 2 Dec 2023 11:58:45 -0600 Subject: [PATCH 1/5] Fixes pylint errors for tests directory --- tests/conftest.py | 2 +- tests/test_documents.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bbcc93d..f8e81e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -142,7 +142,7 @@ def make_document(pdf=DEFAULT_DOCUMENT_URI, **kwargs): def project(client, document_factory): with vcr.use_cassette("tests/cassettes/fixtures/project.yaml"): document = document_factory() - title = "This is a project for testing {}".format(uuid4()) + title = f"This is a project for testing {uuid4()}" project = client.projects.create( title, "This is a project for testing", document_ids=[document.id] ) diff --git a/tests/test_documents.py b/tests/test_documents.py index d48868b..990d97a 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -75,7 +75,7 @@ def test_dir(self, document, attr): def test_mentions(self, client, document): document = client.documents.search( - "document:{} text".format(document.id), mentions="true" + f"document:{document.id} text", mentions="true" )[0] assert document.mentions mention = document.mentions[0] @@ -158,7 +158,9 @@ def test_section(self, document_factory): class TestDocumentClient: def test_search(self, client, document): - documents = client.documents.search("document:{} simple".format(document.id)) + documents = client.documents.search( + f"document:{document.id} simple" + ) assert documents def test_list(self, client): @@ -176,10 +178,11 @@ def test_public_upload(self, public_client): public_client.documents.upload("tests/test.pdf") def test_upload_file(self, document_factory): - pdf = open("tests/test.pdf", "rb") - document = document_factory(pdf) + with open("tests/test.pdf", "rb") as pdf: + document = document_factory(pdf) assert document.status == "success" + def test_upload_file_path(self, document_factory): document = document_factory("tests/test.pdf") assert document.status == "success" From 6b81ca0fa0d61ece4a95d253a622653b636c0d43 Mon Sep 17 00:00:00 2001 From: Sanjin <102841251+duckduckgrayduck@users.noreply.github.com> Date: Sat, 2 Dec 2023 12:03:35 -0600 Subject: [PATCH 2/5] Run pylint & black on ./tests as well --- .github/workflows/main.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0e73d8b..e0b0e11 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,6 @@ jobs: run: | pip install pylint black - - name: Run pylint and black + - name: Run pylint and black on ./documentcloud and ./tests run: | - pylint ./documentcloud - black ./documentcloud + pylint ./documentcloud ./tests; black ./documentcloud ./tests From 7e199aa40564a4109fad3f52b19a22ec444b69d0 Mon Sep 17 00:00:00 2001 From: Sanjin <102841251+duckduckgrayduck@users.noreply.github.com> Date: Sat, 2 Dec 2023 12:05:56 -0600 Subject: [PATCH 3/5] Update main.yml --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e0b0e11..e70dfb2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,7 +45,7 @@ jobs: - name: Install dependencies for imports run: | - pip install python-dateutil requests urllib3 fastjsonschema ratelimit listcrunch pyyaml + pip install python-dateutil requests urllib3 fastjsonschema ratelimit listcrunch pyyaml pytest vcrpy - name: Install pylint and black run: | From 2e346c7171a8d97d9829adfaf3aacf9d54864756 Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Sat, 2 Dec 2023 12:13:38 -0600 Subject: [PATCH 4/5] Bumps version --- docs/changelog.rst | 4 ++++ docs/conf.py | 2 +- setup.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index aeec612..fab318e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,9 @@ Changelog --------- +4.0.1 +~~~~~ +* Reformats some strings in tests to conform to pylint standards. + 4.0.0 ~~~~~ diff --git a/docs/conf.py b/docs/conf.py index ab6c506..93dab48 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -57,7 +57,7 @@ # The short X.Y version. version = "4.0" # The full version, including alpha/beta/rc tags. -release = "4.0.0" +release = "4.0.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index d8549f3..fe9dc50 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="python-documentcloud", - version="4.0.0", + version="4.0.1", description="A simple Python wrapper for the DocumentCloud API", author="Mitchell Kotler", author_email="mitch@muckrock.com", From 3e10f297041089fc9bd4486de41cdd835b56626c Mon Sep 17 00:00:00 2001 From: duckduckgrayduck <102841251+duckduckgrayduck@users.noreply.github.com> Date: Mon, 11 Dec 2023 16:43:45 -0600 Subject: [PATCH 5/5] Remove Python 2 code from docs, add other examples to getting started --- docs/documents.rst | 6 +++--- docs/gettingstarted.rst | 43 ++++++++++++++++++++++++++++++----------- docs/projects.rst | 12 ++++++------ 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/docs/documents.rst b/docs/documents.rst index 9f17a15..cb32935 100644 --- a/docs/documents.rst +++ b/docs/documents.rst @@ -155,11 +155,11 @@ Document >>> # Grab a document >>> obj = client.documents.get('71072') - >>> print obj.title + >>> print(obj.title) Draft OIR Report >>> # Change its title >>> obj.title = "Brand new title" - >>> print obj.title + >>> print(obj.title) Brand New Title >>> # Save those changes >>> obj.put() @@ -282,7 +282,7 @@ Document >>> obj = client.documents.get('1088501-adventuretime-alta') >>> txt = obj.get_page_text(1) # Let's print just the first line - >>> print txt.decode().split("\n")[0] + >>> print(txt.split("\n")[0]) STATE OF CALIFORNIA- HEALTH AND HUMAN SERVICES AGENCY .. method:: get_page_position_json(page) diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index ccbc8cb..4137a4d 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -26,7 +26,7 @@ You can also specify a custom uri if you have installed your own version of Docu >>> client = DocumentCloud(USERNAME, PASSWORD, base_uri="https://your.documentcloud.domain/api/", auth_uri="https://your.account.server.domain/api/") -If you need to debug, you can pass a logging level as a parameter to the client when you instantiate. You will need to import logging first. There are several `logging levels `_ depending on your needs. For this example, we will use the DEBUG level. +If you need to debug, you can pass a logging level as a parameter to the client when you instantiate. You will need to import logging first. There are several `logging levels `_ depending on your needs. For this example, we will use the DEBUG level. :: >>> import logging >>> client = DocumentCloud(USERNAME, PASSWORD, loglevel=logging.DEBUG) @@ -47,13 +47,13 @@ Interacting with a document Once you have you hands on a document object, you can interact with the metadata stored at documentcloud.org. Here's a sample: :: - >>> print obj.title + >>> print(obj.title) Final OIR Report - >>> print obj.id + >>> print(obj.id) 71072 - >>> print obj.contributor_organization + >>> print(obj.contributor_organization) Los Angeles Times - >>> print obj.canonical_url + >>> print(obj.canonical_url) http://www.documentcloud.org/documents/71072-oir-final-report.html You can even download the PDF, page images and full text. :: @@ -92,7 +92,7 @@ Uploading a document that is not a PDF You can upload a document whose file extension is one of the seventy supported filetypes by including the original_extension parameter (See https://www.documentcloud.org/help/api#supported-file-types for supported filetypes) - Example: Uploading a JPG file that is stored in your home directory. + Example: Uploading a JPG file that is stored in your home directory. :: >>> obj = self.client.documents.upload("~/test.jpg", original_extension='jpg') @@ -108,7 +108,7 @@ First upload the document as normal. :: >>> from documentcloud import DocumentCloud >>> client = DocumentCloud(DOCUMENTCLOUD_USERNAME, DOCUMENTCLOUD_PASSWORD) >>> obj = client.documents.upload("/home/ben/pdfs/myfile.pdf", access='public') - + Then refresh your local document object from the server. If it is does not show up as public, then it is still processing, and you'll have to check again. :: >>> obj = client.documents.get(obj.id) @@ -119,7 +119,7 @@ Then refresh your local document object from the server. If it is does not show Uploading a directory of documents as a project ----------------------------------------------- -Here's how to upload a directory full of documents and add them all to a new project. Be warned, this will upload any documents in directories inside the path you specify. :: +Here's how to upload a directory full of PDFs and add them all to a new project. Be warned, this will upload any documents in directories inside the path you specify. :: >>> # Connect to documentcloud >>> from documentcloud import DocumentCloud @@ -133,10 +133,19 @@ Here's how to upload a directory full of documents and add them all to a new pro >>> # Save the changes to the project >>> project.put() +If you want to upload a directory of other file types, you can specify the extensions you want. +For example, the following will upload all .txt and .jpg files in the groucho_marx directory. :: + >>> obj_list = client.documents.upload_directory('/home/ben/pdfs/groucho_marx/', extensions = ['.txt', '.jpg']) + +If you pass extensions='None' it will upload all files that DocumentCloud supprots, regardless of extension type. +For example, the following will upload all files that are supported by DocumentCloud in the groucho_marx directory. :: + >>> obj_list = client.documents.upload_directory('/home/ben/pdfs/groucho_marx/', extensions=None) + + Uploading a PDF from a URL -------------------------- -How to read a PDF document from a URL on the World Wide Web and upload it to DocumentCloud without saving it to your local hard drive. +You can upload a PDF from a remote URL in the following way. :: >>> from documentcloud import DocumentCloud >>> url = "http://myhost.org/interesting-doc.pdf" @@ -146,11 +155,23 @@ How to read a PDF document from a URL on the World Wide Web and upload it to Doc Uploading a document with a different supported file type from URL --------------------------- -Here is an example of how to read a document with another supported file type from a URL and upload it to DocumentCloud without saving it to your local hard drive. +------------------------------------------------------------------ + +You can specify the original_extension on upload to to handle other extension types. :: >>> from documentcloud import DocumentCloud >>> url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" >>> client = DocumentCloud(DOCUMENTCLOUD_USERNAME, DOCUMENTCLOUD_PASSWORD) >>> # Upload the specified URL to the given client >>> obj = client.documents.upload(url, original_extension='png') + +Upload a list of URLs as documents to DocumentCloud +--------------------------------------------------- + +If you are trying to upload a lot of URLs regularly, there is a bulk method to upload them 25 at a time - upload_urls(). :: + + >>> urls = ["https://www.chicago.gov/content/dam/city/depts/dcd/tif/22reports/T_072_24thMichiganAR22.pdf", "https://www.chicago.gov/content/dam/city/depts/dcd/tif/22reports/T_063_CanalCongressAR22.pdf"] + >>> new = client.documents.upload_urls(urls) + >>> new + [, ] + diff --git a/docs/projects.rst b/docs/projects.rst index 8805323..eee2066 100644 --- a/docs/projects.rst +++ b/docs/projects.rst @@ -26,7 +26,7 @@ ProjectClient .. method:: create(title, description="", private=True, document_ids=None) Create a new project on DocumentCloud. You must be authorized to do this. - Returns the object representing the new record you've created. + Returns the object representing the new record you've created. :: >>> from documentcloud import DocumentCloud >>> client = DocumentCloud(USERNAME, PASSWORD) @@ -56,7 +56,7 @@ ProjectClient must be authorized to do this. Returns a tuple. An object representing the record comes first. A boolean that reports whether or not the objects was created fresh comes second. It is true when the record was created, false - when it was found on the site already. + when it was found on the site already. :: >>> from documentcloud import DocumentCloud >>> client = DocumentCloud(USERNAME, PASSWORD) @@ -119,7 +119,7 @@ Project .. method:: clear_documents() - Removes all documents from a project. + Removes all documents from a project. :: >>> obj = client.projects.get('816') >>> obj.clear_documents() @@ -127,7 +127,7 @@ Project .. method:: add_documents() Efficiently adds a lot of documents to a project. - Adds the documents 25 at a time using bulk API calls. + Adds the documents 25 at a time using bulk API calls. :: >>> documents_to_add = [client.documents.get('23745990'), client.documents.get('23745988')] >>> obj = client.projects.get('816') @@ -145,7 +145,7 @@ Project .. attribute:: document_ids A list that contains the unique identifier of the documents assigned to - this project. Cannot be edited. Edit the document_list instead. + this project. Cannot be edited. Edit the document_list instead. :: >>> obj = client.projects.get('816') >>> obj.document_ids @@ -155,7 +155,7 @@ Project A list that documents assigned to this project. Can be expanded by appending new documents to the list or cleared by reassigning it as an - empty list and then issuing the put command. + empty list and then issuing the put command. :: >>> obj = client.projects.get('816') >>> obj.document_list