flairNLP · alanakbik · Jan 16, 2019 · Jan 16, 2019 · Jan 17, 2019 · Jan 17, 2019
diff --git a/flair/embeddings.py b/flair/embeddings.py
@@ -571,6 +571,23 @@ def __init__(self, model: str, detach: bool = True, use_cache: bool = False, cac
             base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/lm-eu-large-backward-v0.1.pt'
             model = cached_path(base_path, cache_dir=cache_dir)
 
+        # Spanish forward fast
+        elif model.lower() == 'spanish-forward-fast' or model.lower() == 'es-forward-fast':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_forward/lm-es-forward-fast.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+        # Spanish backward fast
+        elif model.lower() == 'spanish-backward-fast' or model.lower() == 'es-backward-fast':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_backward/lm-es-backward-fast.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+
+        # Spanish forward
+        elif model.lower() == 'spanish-forward' or model.lower() == 'es-forward':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_forward_long/lm-es-forward.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
+        # Spanish backward
+        elif model.lower() == 'spanish-backward' or model.lower() == 'es-backward':
+            base_path = 'https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/embeddings-v0.4/language_model_es_backward_long/lm-es-backward.pt'
+            model = cached_path(base_path, cache_dir=cache_dir)
 
         elif not Path(model).exists():
             raise ValueError(f'The given model "{model}" is not available or is not a valid path.')

diff --git a/flair/file_utils.py b/flair/file_utils.py
@@ -11,6 +11,7 @@
 import re
 from urllib.parse import urlparse
 
+import mmap
 import requests
 
 # from allennlp.common.tqdm import Tqdm
@@ -22,6 +23,20 @@
 CACHE_ROOT = os.path.expanduser(os.path.join('~', '.flair'))
 
 
+def load_big_file(f):
+    """
+    Workaround for loading a big pickle file. Files over 2GB cause pickle errors on certin Mac and Windows distributions.
+    :param f:
+    :return:
+    """
+    logger.info(f'loading big file {f}')
+    with open(f, 'r+b') as f_in:
+        # mmap seems to be much more memory efficient
+        bf = mmap.mmap(f_in.fileno(), 0)
+        f_in.close()
+    return bf
+
+
 def url_to_filename(url: str, etag: str = None) -> str:
     """
     Converts a url into a filename in a reversible way.

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
@@ -238,10 +238,14 @@ def _load_state(cls, model_file: Union[str, Path]):
         # https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
+            # load_big_file is a workaround by https://github.com/highway11git to load models on some Mac/Windows setups
+            # see https://github.com/zalandoresearch/flair/issues/351
             if torch.cuda.is_available():
-                state = torch.load(str(model_file))
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f)
             else:
-                state = torch.load(str(model_file), map_location={'cuda:0': 'cpu'})
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f, map_location={'cuda:0': 'cpu'})
         return state
 
     def forward_loss(self, sentences: Union[List[Sentence], Sentence], sort=True) -> torch.tensor:

diff --git a/flair/models/text_classification_model.py b/flair/models/text_classification_model.py
@@ -140,10 +140,14 @@ def _load_state(cls, model_file: Union[str, Path]):
         # https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
+            # load_big_file is a workaround by https://github.com/highway11git to load models on some Mac/Windows setups
+            # see https://github.com/zalandoresearch/flair/issues/351
             if torch.cuda.is_available():
-                state = torch.load(str(model_file))
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f)
             else:
-                state = torch.load(str(model_file), map_location={'cuda:0': 'cpu'})
+                f = flair.file_utils.load_big_file(str(model_file))
+                state = torch.load(f, map_location={'cuda:0': 'cpu'})
         return state
 
     def forward_loss(self, sentences: Union[List[Sentence], Sentence]) -> torch.tensor:

diff --git a/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md b/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
@@ -72,6 +72,10 @@ Currently, the following contextual string embeddings are provided (more coming)
 | 'portuguese-backward'    | Portuguese | Added by [@ericlief](https://github.com/ericlief/language_models): Backward LM embeddings |
 | 'basque-forward'    | Basque | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Forward LM embeddings |
 | 'basque-backward'    | Basque | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Backward LM embeddings |
+| 'spanish-forward'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Forward LM embeddings over Wikipedia |
+| 'spanish-backward'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Backward LM embeddings over Wikipedia |
+| 'spanish-forward-fast'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): CPU-friendly forward LM embeddings over Wikipedia |
+| 'spanish-backward-fast'    | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): CPU-friendly backward LM embeddings over Wikipedia |
 
 So, if you want to load embeddings from the English news backward LM model, instantiate the method as follows: