From 1c652bafddfccb1ed4b51589e86416365b6b3940 Mon Sep 17 00:00:00 2001
From: aakbik <alan.akbik@zalando.de>
Date: Wed, 16 Jan 2019 11:44:33 +0100
Subject: [PATCH 1/4] GH-351: added big file loader util

---
 flair/file_utils.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/flair/file_utils.py b/flair/file_utils.py
index 6395495d4..ca163af5e 100644
--- a/flair/file_utils.py
+++ b/flair/file_utils.py
@@ -11,6 +11,7 @@
 import re
 from urllib.parse import urlparse
 
+import mmap
 import requests
 
 # from allennlp.common.tqdm import Tqdm
@@ -22,6 +23,20 @@
 CACHE_ROOT = os.path.expanduser(os.path.join('~', '.flair'))
 
 
+def load_big_file(f):
+    """
+    Workaround for loading a big pickle file. Files over 2GB cause pickle errors on certin Mac and Windows distributions.
+    :param f:
+    :return:
+    """
+    logger.info(f'loading big file {f}')
+    with open(f, 'r+b') as f_in:
+        # mmap seems to be much more memory efficient
+        bf = mmap.mmap(f_in.fileno(), 0)
+        f_in.close()
+    return bf
+
+
 def url_to_filename(url: str, etag: str = None) -> str:
     """
     Converts a url into a filename in a reversible way.

From 3a2b11a82770ab596596a7021806c4cb64d834b4 Mon Sep 17 00:00:00 2001
From: aakbik <alan.akbik@zalando.de>
Date: Wed, 16 Jan 2019 14:01:12 +0100
Subject: [PATCH 2/4] GH-351: added big file deserialization

---
 flair/models/sequence_tagger_model.py     | 8 ++++++--
 flair/models/text_classification_model.py | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index 8a45b8454..730b90890 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -238,10 +238,14 @@ def _load_state(cls, model_file: Union[str, Path]):
         # https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
+            # load_big_file is a workaround by https://github.com/highway11git to load models on some Mac/Windows setups
+            # see https://github.com/zalandoresearch/flair/issues/351
             if torch.cuda.is_available():
-                state = torch.load(str(model_file))
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f)
             else:
-                state = torch.load(str(model_file), map_location={'cuda:0': 'cpu'})
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f, map_location={'cuda:0': 'cpu'})
         return state
 
     def forward_loss(self, sentences: Union[List[Sentence], Sentence], sort=True) -> torch.tensor:
diff --git a/flair/models/text_classification_model.py b/flair/models/text_classification_model.py
index a288bb509..de733a02b 100644
--- a/flair/models/text_classification_model.py
+++ b/flair/models/text_classification_model.py
@@ -140,10 +140,14 @@ def _load_state(cls, model_file: Union[str, Path]):
         # https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
+            # load_big_file is a workaround by https://github.com/highway11git to load models on some Mac/Windows setups
+            # see https://github.com/zalandoresearch/flair/issues/351
             if torch.cuda.is_available():
-                state = torch.load(str(model_file))
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f)
             else:
-                state = torch.load(str(model_file), map_location={'cuda:0': 'cpu'})
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f, map_location={'cuda:0': 'cpu'})
         return state
 
     def forward_loss(self, sentences: Union[List[Sentence], Sentence]) -> torch.tensor:

From 8c27a344c251c4415cffee84f5e17aaf2a6f7568 Mon Sep 17 00:00:00 2001
From: aakbik <alan.akbik@zalando.de>
Date: Thu, 17 Jan 2019 11:41:05 +0100
Subject: [PATCH 3/4] GH-80: added Spanish LM embeddings trained by @iamyihwa

---
 flair/embeddings.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/flair/embeddings.py b/flair/embeddings.py
index 7a3904442..709970df1 100644
--- a/flair/embeddings.py
+++ b/flair/embeddings.py
@@ -571,6 +571,23 @@ def __init__(self, model: str, detach: bool = True, use_cache: bool = False, cac
             base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/lm-eu-large-backward-v0.1.pt'
             model = cached_path(base_path, cache_dir=cache_dir)
 
+        # Spanish forward fast
+        elif model.lower() == 'spanish-forward-fast' or model.lower() == 'es-forward-fast':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_forward/lm-es-forward-fast.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+        # Spanish backward fast
+        elif model.lower() == 'spanish-backward-fast' or model.lower() == 'es-backward-fast':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_backward/lm-es-backward-fast.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+
+        # Spanish forward
+        elif model.lower() == 'spanish-forward' or model.lower() == 'es-forward':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_forward_long/lm-es-forward.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+        # Spanish backward
+        elif model.lower() == 'spanish-backward' or model.lower() == 'es-backward':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_backward_long/lm-es-backward.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
 
         elif not Path(model).exists():
             raise ValueError(f'The given model "{model}" is not available or is not a valid path.')

From ce2caf046d8e3cdedf19f57237c4fbda4033da8b Mon Sep 17 00:00:00 2001
From: aakbik <alan.akbik@zalando.de>
Date: Thu, 17 Jan 2019 11:54:12 +0100
Subject: [PATCH 4/4] GH-80: added Spanish LM embeddings trained by @iamyihwa

---
 resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md b/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
index db8429c3f..4af76df04 100644
--- a/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
+++ b/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
@@ -72,6 +72,10 @@ Currently, the following contextual string embeddings are provided (more coming)
 | 'portuguese-backward'    | Portuguese | Added by [@ericlief](https://github.com/ericlief/language_models): Backward LM embeddings |
 | 'basque-forward'    | Basque | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Forward LM embeddings |
 | 'basque-backward'    | Basque | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Backward LM embeddings |
+| 'spanish-forward'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Forward LM embeddings over Wikipedia |
+| 'spanish-backward'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Backward LM embeddings over Wikipedia |
+| 'spanish-forward-fast'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): CPU-friendly forward LM embeddings over Wikipedia |
+| 'spanish-backward-fast'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): CPU-friendly backward LM embeddings over Wikipedia |
 
 So, if you want to load embeddings from the English news backward LM model, instantiate the method as follows: