Skip to content

Commit

Permalink
Merge pull request #16 from gurugaurav/1.1.0
Browse files Browse the repository at this point in the history
1.1.0
  • Loading branch information
gurugaurav authored May 28, 2021
2 parents 8726c9d + dc3666d commit 296c125
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 114 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ dataset/*
dataset
dist
image_search.egg-info
**/.vscode/*
**/.vscode/*
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pip install .
### Usage <br />
```python
from bing_image_downloader import downloader
downloader.download(query_string, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, print_out=True)
downloader.download(query_string, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=Flase)
```

`query_string` : String to be searched.<br />
Expand All @@ -37,7 +37,7 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter
`adult_filter_off` : (optional, default is True) Enable of disable adult filteration.<br />
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.<br />
`timeout` : (optional, default is 60) timeout for connection in seconds.<br />
`print_out` : (optional, default is True) disable downloaded message.<br />
`verbose` : (optional, default is False) disable downloaded message.<br />



Expand All @@ -47,9 +47,14 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter
https://pypi.org/project/bing-image-downloader/




</br>

### Donate
Please support me if this project was helpful to you.</br>
[![paypal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://paypal.me/GuruPrasadGaurav?locale.x=en_GB)

[<img src="https://www.buymeacoffee.com/assets/img/guidelines/download-assets-sm-1.svg" alt="Show your support" width="180"/>](https://www.buymeacoffee.com/gurugaurav)



Expand Down
64 changes: 0 additions & 64 deletions bing_image_downloader.egg-info/PKG-INFO

This file was deleted.

10 changes: 0 additions & 10 deletions bing_image_downloader.egg-info/SOURCES.txt

This file was deleted.

1 change: 0 additions & 1 deletion bing_image_downloader.egg-info/dependency_links.txt

This file was deleted.

1 change: 0 additions & 1 deletion bing_image_downloader.egg-info/top_level.txt

This file was deleted.

49 changes: 30 additions & 19 deletions bing_image_downloader/bing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from pathlib import Path
import os
import sys
import urllib.request
import urllib
import imghdr
Expand All @@ -14,34 +12,42 @@


class Bing:
def __init__(self, query, limit, output_dir, adult, timeout, filters='', print_out):
def __init__(self, query, limit, output_dir, adult, timeout, filters='', verbose=True):
self.download_count = 0
self.query = query
self.output_dir = output_dir
self.adult = adult
self.filters = filters
self.print_out = print_out
self.verbose = verbose

assert type(limit) == int, "limit must be integer"
self.limit = limit
assert type(timeout) == int, "timeout must be integer"
self.timeout = timeout

self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
# self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
self.page_counter = 0
self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}

def save_image(self, link, file_path):
request = urllib.request.Request(link, None, self.headers)
image = urllib.request.urlopen(request, timeout=self.timeout).read()
if not imghdr.what(None, image):
print('[Error]Invalid image, not saving {}\n'.format(link))
raise
with open(file_path, 'wb') as f:
raise ValueError('Invalid image, not saving {}\n'.format(link))
with open(str(file_path), 'wb') as f:
f.write(image)


def download_image(self, link):
self.download_count += 1

# Get the image link
try:
path = urllib.parse.urlsplit(link).path
Expand All @@ -50,41 +56,46 @@ def download_image(self, link):
if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
file_type = "jpg"

if self.print_out:
if self.verbose:
# Download the image
print("[%] Downloading Image #{} from {}".format(self.download_count, link))

self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
str(self.download_count), file_type))
if self.print_out:
self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
str(self.download_count), file_type)))
if self.verbose:
print("[%] File Downloaded !\n")

except Exception as e:
self.download_count -= 1
print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))


def run(self):
while self.download_count < self.limit:
if self.print_out:
if self.verbose:
print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
# Parse the page source and download pics
request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
+ '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
+ '&adlt=' + self.adult + '&qft=' + self.filters
+ '&adlt=' + self.adult + '&qft=' + ('' if self.filters is None else str(self.filters))
request = urllib.request.Request(request_url, None, headers=self.headers)
response = urllib.request.urlopen(request)
html = response.read().decode('utf8')
if html == "":
print("[%] No more images are available")
break
links = re.findall('murl&quot;:&quot;(.*?)&quot;', html)
if self.print_out:
if self.verbose:
print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1))
print("\n===============================================\n")

for link in links:
if self.download_count < self.limit:
self.download_image(link)
else:
print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
print("\n===============================================\n")
break

self.page_counter += 1
print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
print("===============================================\n")
print("Please show your support here")
print("https://www.buymeacoffee.com/gurugaurav")
print("\n===============================================\n")
34 changes: 20 additions & 14 deletions bing_image_downloader/downloader.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,45 @@
import os
import os, sys
import shutil
from pathlib import Path

try:
from bing import Bing
except ImportError: # Python 3
from .bing import Bing


def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, print_out=True):
def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
force_replace=False, timeout=60, verbose=True):

# engine = 'bing'
if adult_filter_off:
adult = 'off'
else:
adult = 'on'

cwd = os.getcwd()
image_dir = os.path.join(cwd, output_dir, query)

# cwd = Path.cwd()
# output_dir = Path(output_dir)
# image_dir = Path.joinpath(cwd, output_dir, query)
image_dir = Path(output_dir).joinpath(query).absolute()

if force_replace:
if os.path.isdir(image_dir):
if Path.isdir(image_dir):
shutil.rmtree(image_dir)

# check directory and create if necessary
try:
if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
os.makedirs("{}/{}/".format(cwd, output_dir))
except:
pass
if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

bing = Bing(query, limit, output_dir, adult, timeout, print_out)
if not Path.is_dir(image_dir):
Path.mkdir(image_dir, parents=True)

except Exception as e:
print('[Error]Failed to create directory.', e)
sys.exit(1)

print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
bing = Bing(query, limit, image_dir, adult, timeout, verbose)
bing.run()


if __name__ == '__main__':
download('cat', limit=10, timeout='1')
download('dog', output_dir="..\\Users\\cat", limit=10, timeout=1)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="bing_image_downloader",
version="1.0.4",
version="1.1.0",
author="Guru Prasad Singh",
author_email="g.gaurav541@gmail.com",
description="Python library to download bulk images from Bing.com",
Expand Down

0 comments on commit 296c125

Please sign in to comment.