From 666a7ffcc24a6ae3e81aacb275493b1c28aeb817 Mon Sep 17 00:00:00 2001 From: Mike <45373284+munkhuushmgl@users.noreply.github.com> Date: Wed, 2 Dec 2020 14:26:12 -0800 Subject: [PATCH] fix: added if statement to filter out dir blob files (#63) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #62 🦕 Current version of sample doesnt check if blob is directory or .json file. Then, it downloads as bytes and tries to parse json from the dir blob file which will cause error. --- batch_process_documents_sample_v1beta3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/batch_process_documents_sample_v1beta3.py b/batch_process_documents_sample_v1beta3.py index 6e22e0ea2ed8..ea6c01e31f93 100644 --- a/batch_process_documents_sample_v1beta3.py +++ b/batch_process_documents_sample_v1beta3.py @@ -78,9 +78,12 @@ def batch_process_documents( for i, blob in enumerate(blob_list): # Download the contents of this blob as a bytes object. + if ".json" not in blob.name: + return + # Only parses JSON files blob_as_bytes = blob.download_as_bytes() - document = documentai.types.Document.from_json(blob_as_bytes) + document = documentai.types.Document.from_json(blob_as_bytes) print(f"Fetched file {i + 1}") # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document