Merge pull request #16 from gurugaurav/1.1.0

1.1.0
gurugaurav · May 28, 2021 · 296c125 · 296c125
2 parents 8726c9d + dc3666d
commit 296c125
Show file tree

Hide file tree

Showing 9 changed files with 60 additions and 114 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,4 @@ dataset/*
 dataset
 dist
 image_search.egg-info
-**/.vscode/*
+**/.vscode/*
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ pip install .
 ### Usage <br />
 ```python
 from bing_image_downloader import downloader
-downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, print_out=True)
+downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=Flase)
 ```
 
 `query_string` : String to be searched.<br />
@@ -37,7 +37,7 @@ downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter
 `adult_filter_off` : (optional, default is True) Enable of disable adult filteration.<br />
 `force_replace` : (optional, default is False) Delete folder if present and start a fresh download.<br />
 `timeout` : (optional, default is 60) timeout for connection in seconds.<br />
-`print_out` : (optional, default is True) disable downloaded message.<br />
+`verbose` : (optional, default is False) disable downloaded message.<br />
 
 
 
@@ -47,9 +47,14 @@ downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter
 https://pypi.org/project/bing-image-downloader/
 
 
+
+
+</br>
+
 ### Donate
 Please support me if this project was helpful to you.</br>
-[![paypal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://paypal.me/GuruPrasadGaurav?locale.x=en_GB)
+
+[<img src="https://www.buymeacoffee.com/assets/img/guidelines/download-assets-sm-1.svg" alt="Show your support" width="180"/>](https://www.buymeacoffee.com/gurugaurav)
 
 
 

diff --git a/bing_image_downloader.egg-info/PKG-INFO b/bing_image_downloader.egg-info/PKG-INFO
diff --git a/bing_image_downloader.egg-info/SOURCES.txt b/bing_image_downloader.egg-info/SOURCES.txt
diff --git a/bing_image_downloader.egg-info/dependency_links.txt b/bing_image_downloader.egg-info/dependency_links.txt
diff --git a/bing_image_downloader.egg-info/top_level.txt b/bing_image_downloader.egg-info/top_level.txt
diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
@@ -1,6 +1,4 @@
 from pathlib import Path
-import os
-import sys
 import urllib.request
 import urllib
 import imghdr
@@ -14,34 +12,42 @@
 
 
 class Bing:
-    def __init__(self, query, limit, output_dir, adult, timeout, filters='', print_out):
+    def __init__(self, query, limit, output_dir, adult, timeout,  filters='', verbose=True):
         self.download_count = 0
         self.query = query
         self.output_dir = output_dir
         self.adult = adult
         self.filters = filters
-        self.print_out = print_out
+        self.verbose = verbose
 
         assert type(limit) == int, "limit must be integer"
         self.limit = limit
         assert type(timeout) == int, "timeout must be integer"
         self.timeout = timeout
 
-        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
+        # self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
         self.page_counter = 0
+        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' 
+      'AppleWebKit/537.11 (KHTML, like Gecko) '
+      'Chrome/23.0.1271.64 Safari/537.11',
+      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
+      'Accept-Encoding': 'none',
+      'Accept-Language': 'en-US,en;q=0.8',
+      'Connection': 'keep-alive'}
 
     def save_image(self, link, file_path):
         request = urllib.request.Request(link, None, self.headers)
         image = urllib.request.urlopen(request, timeout=self.timeout).read()
         if not imghdr.what(None, image):
             print('[Error]Invalid image, not saving {}\n'.format(link))
-            raise
-        with open(file_path, 'wb') as f:
+            raise ValueError('Invalid image, not saving {}\n'.format(link))
+        with open(str(file_path), 'wb') as f:
             f.write(image)
 
+
     def download_image(self, link):
         self.download_count += 1
-
         # Get the image link
         try:
             path = urllib.parse.urlsplit(link).path
@@ -50,41 +56,46 @@ def download_image(self, link):
             if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
                 file_type = "jpg"
 
-            if self.print_out:
+            if self.verbose:
                 # Download the image
                 print("[%] Downloading Image #{} from {}".format(self.download_count, link))
 
-            self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
-                str(self.download_count), file_type))
-            if self.print_out:
+            self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
+                str(self.download_count), file_type)))
+            if self.verbose:
                 print("[%] File Downloaded !\n")
 
         except Exception as e:
             self.download_count -= 1
             print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))
 
+
     def run(self):
         while self.download_count < self.limit:
-            if self.print_out:
+            if self.verbose:
                 print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
             # Parse the page source and download pics
             request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
                           + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
-                          + '&adlt=' + self.adult + '&qft=' + self.filters
+                          + '&adlt=' + self.adult + '&qft=' + ('' if self.filters is None else str(self.filters))
             request = urllib.request.Request(request_url, None, headers=self.headers)
             response = urllib.request.urlopen(request)
             html = response.read().decode('utf8')
+            if html ==  "":
+                print("[%] No more images are available")
+                break
             links = re.findall('murl&quot;:&quot;(.*?)&quot;', html)
-            if self.print_out:
+            if self.verbose:
                 print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1))
                 print("\n===============================================\n")
 
             for link in links:
                 if self.download_count < self.limit:
                     self.download_image(link)
-                else:
-                    print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
-                    print("\n===============================================\n")
-                    break
 
             self.page_counter += 1
+        print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
+        print("===============================================\n")
+        print("Please show your support here")
+        print("https://www.buymeacoffee.com/gurugaurav")
+        print("\n===============================================\n")
diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py
@@ -1,39 +1,45 @@
-import os
+import os, sys
 import shutil
+from pathlib import Path
 
 try:
     from bing import Bing
 except ImportError:  # Python 3
     from .bing import Bing
 
 
-def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, print_out=True):
+def download(query, limit=100, output_dir='dataset', adult_filter_off=True, 
+force_replace=False, timeout=60, verbose=True):
 
     # engine = 'bing'
     if adult_filter_off:
         adult = 'off'
     else:
         adult = 'on'
 
-    cwd = os.getcwd()
-    image_dir = os.path.join(cwd, output_dir, query)
+
+    # cwd = Path.cwd()
+    # output_dir = Path(output_dir)
+    # image_dir = Path.joinpath(cwd, output_dir, query)
+    image_dir = Path(output_dir).joinpath(query).absolute()
 
     if force_replace:
-        if os.path.isdir(image_dir):
+        if Path.isdir(image_dir):
             shutil.rmtree(image_dir)
 
     # check directory and create if necessary
     try:
-        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
-            os.makedirs("{}/{}/".format(cwd, output_dir))
-    except:
-        pass
-    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
-        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))
-
-    bing = Bing(query, limit, output_dir, adult, timeout, print_out)
+        if not Path.is_dir(image_dir):
+            Path.mkdir(image_dir, parents=True)
+
+    except Exception as e:
+        print('[Error]Failed to create directory.', e)
+        sys.exit(1)
+
+    print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
+    bing = Bing(query, limit, image_dir, adult, timeout, verbose)
     bing.run()
 
 
 if __name__ == '__main__':
-    download('cat', limit=10, timeout='1')
+    download('dog', output_dir="..\\Users\\cat", limit=10, timeout=1)
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="bing_image_downloader",
-    version="1.0.4",
+    version="1.1.0",
     author="Guru Prasad Singh",
     author_email="g.gaurav541@gmail.com",
     description="Python library to download bulk images from Bing.com",