Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌍 Add google-earth filetype kml and update contributing #11

Merged
merged 8 commits into from
Oct 30, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ The steps are therefore:
- Create your REQUEST PULL


Notes: This link `https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types` can help for adding a new extension
Notes: Thos links can help for adding a new extension
- `https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types`
- `https://support.google.com/webmasters/answer/35287`
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,43 @@ http://www.justice.gouv.fr/art_pix/Stat_RSJ_12.7_Civil_Les_tribunaux_de_commerce
https://www.insee.fr/fr/metadonnees/source/fichier/Precision-principaux-indicateurs-crise-sanitaire-2020.ods
```

### KML results
```python
from websearch import WebSearch
web = WebSearch('Madagascar')
maps = web.kml
for map in maps[:3]:
print(map)
```

```
[RESULTS]
http://www.hydrosciences.fr/sierem/kmz_files/MGPLGRA.kml
https://www.ngoaidmap.org/downloads?doc=kml&name=association-intercooperation-madagascar-aim_projects&partners%5B%5D=6160&sectors%5B%5D=1&status=active
https://ngoaidmap.org/downloads?doc=kml&name=nemp-madagascar-cyclone-enawo-response_projects&projects%5B%5D=20655&status=active
```

### CUSTOM search

For other extensions, not present, use the `custom` function

Second arg is explained [here](https://github.com/WebSearh/pull/4)

```python
from websearch import WebSearch
web = WebSearch('Biologie')
ps_documents = web.custom_search('ps', 'application/postscript')
for doc in ps_documents[:3]:
print(doc)
```

```
[RESULTS]
http://irma.math.unistra.fr/~fbertran/Master1_2020_2/L3Court.ps
http://jfla.inria.fr/2002/actes/10-michel.ps
https://www.crstra.dz/telechargement/pnr/ps/environnement/fadel-djamel.ps
```

## LICENSE

MIT License
Expand Down
16 changes: 16 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ def test8_ods(self):
for doc in documents:
self.assertTrue(doc.startswith('http'))

def test9_kml(self):
maps = websearch.WebSearch('Madagascar').kml[:1]
#Verification de nombre de résultats
self.assertTrue(len(maps))
#verification lien
for map in maps:
self.assertTrue(map.startswith('http'))

def test10_custom(self):
documents = websearch.WebSearch('Biologie').custom_search('ps', 'application/postscript')[:1]
#Verification de nombre de résultats
self.assertTrue(len(documents))
#verification lien
for doc in documents:
self.assertTrue(map.startswith('http'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why you use map ? you mean doc, that's why test fail for custom_search

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for the error


if __name__ == '__main__':
runner = unittest.TestCase()
runner.run()
55 changes: 47 additions & 8 deletions websearch/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, query, verif=True, **kwargs):
# utiliser pour l'optimisation
self.__data = {}

def __verif_content(self, urls, ext):
def __verif_content(self, urls, mimetype):
'''
Verification du bon format du lien
argument `ext` peut être consulté ici:
Expand All @@ -52,7 +52,7 @@ def __verif_content(self, urls, ext):
print(err)
continue
# Verfier si le lien renvoie bien le format voulu.
if rq.get('content-type') == f'application/{ext}':
if rq.get('content-type') == f'{mimetype}':
new_urls.append(url)
# renvoyer les urls verfiés.
return new_urls
Expand Down Expand Up @@ -124,7 +124,7 @@ def pdf(self):
return self.__data['pdf'][1]
tmp = self.query
self.query = 'filetype:pdf ' + self.query
result = self.__verif_content(self.pages, 'pdf')
result = self.__verif_content(self.pages, 'application/pdf')
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['pdf'] = (self.query, result)
Expand All @@ -141,7 +141,7 @@ def docx(self):
return self.__data['docx'][1]
tmp = self.query
self.query = 'filetype:docx ' + self.query
result = self.__verif_content(self.pages, "vnd.openxmlformats-officedocument.wordprocessingml.document")
result = self.__verif_content(self.pages, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['docx'] = (self.query, result)
Expand All @@ -157,7 +157,7 @@ def xlsx(self):
return self.__data['xlsx'][1]
tmp = self.query
self.query = 'filetype:xlsx ' + self.query
result = self.__verif_content(self.pages, "vnd.openxmlformats-officedocument.spreadsheetml.sheet")
result = self.__verif_content(self.pages, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['xlsx'] = (self.query, result)
Expand All @@ -174,7 +174,7 @@ def pptx(self):
return self.__data['pptx'][1]
tmp = self.query
self.query = 'filetype:pptx ' + self.query
result = self.__verif_content(self.pages, "vnd.openxmlformats-officedocument.presentationml.presentation")
result = self.__verif_content(self.pages, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['pptx'] = (self.query, result)
Expand All @@ -191,7 +191,7 @@ def odt(self):
return self.__data['odt'][1]
tmp = self.query
self.query = 'filetype:odt ' + self.query
result = self.__verif_content(self.pages, "vnd.oasis.opendocument.text")
result = self.__verif_content(self.pages, "application/vnd.oasis.opendocument.text")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['odt'] = (self.query, result)
Expand All @@ -208,9 +208,48 @@ def ods(self):
return self.__data['ods'][1]
tmp = self.query
self.query = 'filetype:ods ' + self.query
result = self.__verif_content(self.pages, "vnd.oasis.opendocument.spreadsheet")
result = self.__verif_content(self.pages, "application/vnd.oasis.opendocument.spreadsheet")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['ods'] = (self.query, result)
return result

@property
def kml(self):
'''
Fonction pour recuperer des fichiers de projets géographiques
pour google earth sous la format kml
'''
# On vérifie que les résultats ne sont pas déjà enregistrés.
if self.__data.get('kml'):
if self.__data['kml'][0] == self.query:
return self.__data['kml'][1]
tmp = self.query
self.query = 'filetype:kml ' + self.query
result = self.__verif_content(self.pages, "application/vnd.google-earth.kml+xml")
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data['kml'] = (self.query, result)
return result

def custom_search(self, extension='pdf', mimetype='application/pdf'):
'''
Fonction pour recuperer des fichiers en fonction
de l'extension voulu et des type de mime que ce dernier utilise

Keyword arguments:
extension -- The file's extension (default pdf)
mimetype -- The mimetype that match the extension (default pdf)
'''
# On verifie que les resultats n'est pas deja enregistrer.
if self.__data.get(extension):
if self.__data[extension][0] == self.query:
return self.__data[extension][1]
tmp = self.query
self.query = f'filetype:{extension} {self.query}'
result = self.__verif_content(self.pages, mimetype)
self.query = tmp
# Sauvegarde des resultats pour optimiser la prochaine même appel.
self.__data[extension] = (self.query, result)
return result