diff --git a/docs/notebooks/soft_cosine_benchmark.ipynb b/docs/notebooks/soft_cosine_benchmark.ipynb index 7b4bfbe317..9421b84c17 100644 --- a/docs/notebooks/soft_cosine_benchmark.ipynb +++ b/docs/notebooks/soft_cosine_benchmark.ipynb @@ -18,7 +18,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "55a687c2d30bb9f4a03f4b73687ccc7803ca9993\r\n" + "d429fedf094e00c4bb5c27589d5befb53b2e4b13\r\n" ] } ], @@ -169,13 +169,13 @@ "outputs": [], "source": [ "def benchmark(configuration):\n", - " dictionary, nonzero_limit, symmetric, repetition = configuration\n", + " dictionary, nonzero_limit, symmetric, positive_definite, repetition = configuration\n", " index = UniformTermSimilarityIndex(dictionary)\n", " \n", " start_time = time()\n", " matrix = SparseTermSimilarityMatrix(\n", " index, dictionary, nonzero_limit=nonzero_limit, symmetric=symmetric,\n", - " dtype=np.float16).matrix\n", + " positive_definite=positive_definite, dtype=np.float16).matrix\n", " end_time = time()\n", " \n", " duration = end_time - start_time\n", @@ -185,6 +185,7 @@ " \"matrix_nonzero\": matrix.nnz,\n", " \"repetition\": repetition,\n", " \"symmetric\": symmetric,\n", + " \"positive_definite\": positive_definite,\n", " \"duration\": duration, }" ] }, @@ -196,7 +197,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16b3428e46494df9b65eedd34a35b9a2", + "model_id": "4aef903a70e24247ad3c889237ed4c48", "version_major": 2, "version_minor": 0 }, @@ -225,9 +226,10 @@ "dictionaries.append(full_dictionary)\n", "nonzero_limits = [1, 10, 100]\n", "symmetry = (True, False)\n", + "positive_definiteness = (True, False)\n", "repetitions = range(10)\n", "\n", - "configurations = product(dictionaries, nonzero_limits, symmetry, repetitions)\n", + "configurations = product(dictionaries, nonzero_limits, symmetry, positive_definiteness, repetitions)\n", "results = benchmark_results(benchmark, configurations, \"matrix_speed.director_results\")" ] }, @@ -235,9 +237,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following tables show how long it takes to construct a term similarity matrix (the **duration** column), how many nonzero elements there are in the matrix (the **matrix_nonzero** column) and the mean term similarity consumption speed (the **consumption_speed** column) as we vary the dictionary size (the **dictionary_size** column) the maximum number of nonzero elements outside the diagonal in every column of the matrix (the **nonzero_limit** column), and the matrix symmetry constraint (the **symmetric** column). Ten independendent measurements were taken. The top table shows the mean values and the bottom table shows the standard deviations.\n", + "The following tables show how long it takes to construct a term similarity matrix (the **duration** column), how many nonzero elements there are in the matrix (the **matrix_nonzero** column) and the mean term similarity consumption speed (the **consumption_speed** column) as we vary the dictionary size (the **dictionary_size** column) the maximum number of nonzero elements outside the diagonal in every column of the matrix (the **nonzero_limit** column), the matrix symmetry constraint (the **symmetric** column), and the matrix positive definiteness constraing (the **positive_definite** column). Ten independendent measurements were taken. The top table shows the mean values and the bottom table shows the standard deviations.\n", "\n", - "We can see that the symmetry constraint severely limits the number of nonzero elements in the resulting matrix. This in turn increases the consumption speed, since we end up throwing away most of the elements that we consume. The effects of the dictionary size on the mean term similarity consumption speed are minor." + "We can see that the symmetry and positive definiteness constraints severely limit the number of nonzero elements in the resulting matrix. This in turn increases the consumption speed, since we end up throwing away most of the elements that we consume. The effects of the dictionary size on the mean term similarity consumption speed are minor to none." ] }, { @@ -248,7 +250,7 @@ "source": [ "df = pd.DataFrame(results)\n", "df[\"consumption_speed\"] = df.dictionary_size * df.nonzero_limit / df.duration\n", - "df = df.groupby([\"dictionary_size\", \"nonzero_limit\", \"symmetric\"])\n", + "df = df.groupby([\"dictionary_size\", \"nonzero_limit\", \"symmetric\", \"positive_definite\"])\n", "\n", "def display(df):\n", " df[\"duration\"] = [timedelta(0, duration) for duration in df[\"duration\"]]\n", @@ -285,6 +287,7 @@ " \n", " \n", " \n", + " \n", " duration\n", " matrix_nonzero\n", " consumption_speed\n", @@ -293,6 +296,7 @@ " dictionary_size\n", " nonzero_limit\n", " symmetric\n", + " positive_definite\n", " \n", " \n", " \n", @@ -300,119 +304,254 @@ " \n", " \n", " \n", - " 10000\n", - " 1\n", + " 10000\n", + " 1\n", + " False\n", " False\n", - " 00:00:00.286091\n", + " 00:00:00.435533\n", + " 20000\n", + " 22.96 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.492606\n", " 20000\n", - " 34.95 Kword pairs / s\n", + " 20.30 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:00.185563\n", + " 10002\n", + " 53.90 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.166287\n", + " 00:00:00.240471\n", " 10002\n", - " 60.15 Kword pairs / s\n", + " 41.59 Kword pairs / s\n", " \n", " \n", - " 10\n", + " 10\n", + " False\n", " False\n", - " 00:00:01.573833\n", + " 00:00:02.687836\n", " 110000\n", - " 63.54 Kword pairs / s\n", + " 37.21 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.640328\n", + " 00:00:00.615492\n", + " 20000\n", + " 162.49 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:00.501188\n", " 10118\n", - " 156.20 Kword pairs / s\n", + " 199.53 Kword pairs / s\n", " \n", " \n", - " 100\n", + " True\n", + " 00:00:01.380586\n", + " 10010\n", + " 72.44 Kword pairs / s\n", + " \n", + " \n", + " 100\n", + " False\n", " False\n", - " 00:00:14.662728\n", + " 00:00:25.262807\n", " 1010000\n", - " 68.20 Kword pairs / s\n", + " 39.58 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:05.233251\n", + " 00:00:01.132524\n", + " 20000\n", + " 883.02 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:03.595666\n", " 20198\n", - " 191.09 Kword pairs / s\n", + " 278.13 Kword pairs / s\n", " \n", " \n", - " 2010000\n", - " 1\n", + " True\n", + " 00:00:11.818912\n", + " 10100\n", + " 84.61 Kword pairs / s\n", + " \n", + " \n", + " 2010000\n", + " 1\n", + " False\n", " False\n", - " 00:01:02.938585\n", + " 00:01:31.786585\n", " 4020000\n", - " 31.94 Kword pairs / s\n", + " 21.90 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:35.977733\n", + " 00:01:40.954580\n", + " 4020000\n", + " 19.91 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:39.050064\n", " 2010002\n", - " 55.87 Kword pairs / s\n", + " 51.48 Kword pairs / s\n", " \n", " \n", - " 10\n", + " True\n", + " 00:00:49.238437\n", + " 2010002\n", + " 40.82 Kword pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", " False\n", - " 00:05:53.410117\n", + " 00:09:35.470373\n", " 22110000\n", - " 56.88 Kword pairs / s\n", + " 34.93 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:02:07.940066\n", + " 00:02:02.920334\n", + " 4020000\n", + " 163.52 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:01:39.576693\n", " 2010118\n", - " 157.11 Kword pairs / s\n", + " 201.88 Kword pairs / s\n", " \n", " \n", - " 100\n", + " True\n", + " 00:04:35.646501\n", + " 2010010\n", + " 72.92 Kword pairs / s\n", + " \n", + " \n", + " 100\n", + " False\n", " False\n", - " 01:06:08.946648\n", + " 01:42:01.747568\n", " 203010000\n", - " 50.65 Kword pairs / s\n", + " 32.88 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:17:06.064969\n", + " 00:03:36.420778\n", + " 4020000\n", + " 928.75 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:10:58.434060\n", " 2020198\n", - " 195.91 Kword pairs / s\n", + " 305.30 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:39:40.319479\n", + " 2010100\n", + " 84.44 Kword pairs / s\n", " \n", " \n", "\n", "" ], "text/plain": [ - " duration matrix_nonzero \\\n", - "dictionary_size nonzero_limit symmetric \n", - "10000 1 False 00:00:00.286091 20000 \n", - " True 00:00:00.166287 10002 \n", - " 10 False 00:00:01.573833 110000 \n", - " True 00:00:00.640328 10118 \n", - " 100 False 00:00:14.662728 1010000 \n", - " True 00:00:05.233251 20198 \n", - "2010000 1 False 00:01:02.938585 4020000 \n", - " True 00:00:35.977733 2010002 \n", - " 10 False 00:05:53.410117 22110000 \n", - " True 00:02:07.940066 2010118 \n", - " 100 False 01:06:08.946648 203010000 \n", - " True 00:17:06.064969 2020198 \n", + " duration \\\n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 00:00:00.435533 \n", + " True 00:00:00.492606 \n", + " True False 00:00:00.185563 \n", + " True 00:00:00.240471 \n", + " 10 False False 00:00:02.687836 \n", + " True 00:00:00.615492 \n", + " True False 00:00:00.501188 \n", + " True 00:00:01.380586 \n", + " 100 False False 00:00:25.262807 \n", + " True 00:00:01.132524 \n", + " True False 00:00:03.595666 \n", + " True 00:00:11.818912 \n", + "2010000 1 False False 00:01:31.786585 \n", + " True 00:01:40.954580 \n", + " True False 00:00:39.050064 \n", + " True 00:00:49.238437 \n", + " 10 False False 00:09:35.470373 \n", + " True 00:02:02.920334 \n", + " True False 00:01:39.576693 \n", + " True 00:04:35.646501 \n", + " 100 False False 01:42:01.747568 \n", + " True 00:03:36.420778 \n", + " True False 00:10:58.434060 \n", + " True 00:39:40.319479 \n", + "\n", + " matrix_nonzero \\\n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 20000 \n", + " True 20000 \n", + " True False 10002 \n", + " True 10002 \n", + " 10 False False 110000 \n", + " True 20000 \n", + " True False 10118 \n", + " True 10010 \n", + " 100 False False 1010000 \n", + " True 20000 \n", + " True False 20198 \n", + " True 10100 \n", + "2010000 1 False False 4020000 \n", + " True 4020000 \n", + " True False 2010002 \n", + " True 2010002 \n", + " 10 False False 22110000 \n", + " True 4020000 \n", + " True False 2010118 \n", + " True 2010010 \n", + " 100 False False 203010000 \n", + " True 4020000 \n", + " True False 2020198 \n", + " True 2010100 \n", "\n", - " consumption_speed \n", - "dictionary_size nonzero_limit symmetric \n", - "10000 1 False 34.95 Kword pairs / s \n", - " True 60.15 Kword pairs / s \n", - " 10 False 63.54 Kword pairs / s \n", - " True 156.20 Kword pairs / s \n", - " 100 False 68.20 Kword pairs / s \n", - " True 191.09 Kword pairs / s \n", - "2010000 1 False 31.94 Kword pairs / s \n", - " True 55.87 Kword pairs / s \n", - " 10 False 56.88 Kword pairs / s \n", - " True 157.11 Kword pairs / s \n", - " 100 False 50.65 Kword pairs / s \n", - " True 195.91 Kword pairs / s " + " consumption_speed \n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 22.96 Kword pairs / s \n", + " True 20.30 Kword pairs / s \n", + " True False 53.90 Kword pairs / s \n", + " True 41.59 Kword pairs / s \n", + " 10 False False 37.21 Kword pairs / s \n", + " True 162.49 Kword pairs / s \n", + " True False 199.53 Kword pairs / s \n", + " True 72.44 Kword pairs / s \n", + " 100 False False 39.58 Kword pairs / s \n", + " True 883.02 Kword pairs / s \n", + " True False 278.13 Kword pairs / s \n", + " True 84.61 Kword pairs / s \n", + "2010000 1 False False 21.90 Kword pairs / s \n", + " True 19.91 Kword pairs / s \n", + " True False 51.48 Kword pairs / s \n", + " True 40.82 Kword pairs / s \n", + " 10 False False 34.93 Kword pairs / s \n", + " True 163.52 Kword pairs / s \n", + " True False 201.88 Kword pairs / s \n", + " True 72.92 Kword pairs / s \n", + " 100 False False 32.88 Kword pairs / s \n", + " True 928.75 Kword pairs / s \n", + " True False 305.30 Kword pairs / s \n", + " True 84.44 Kword pairs / s " ] }, "execution_count": 8, @@ -454,6 +593,7 @@ " \n", " \n", " \n", + " \n", " duration\n", " matrix_nonzero\n", " consumption_speed\n", @@ -462,6 +602,7 @@ " dictionary_size\n", " nonzero_limit\n", " symmetric\n", + " positive_definite\n", " \n", " \n", " \n", @@ -469,119 +610,254 @@ " \n", " \n", " \n", - " 10000\n", - " 1\n", + " 10000\n", + " 1\n", + " False\n", " False\n", - " 00:00:00.001519\n", + " 00:00:00.005334\n", " 0\n", - " 0.19 Kword pairs / s\n", + " 0.28 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.002255\n", + " 00:00:00.004072\n", " 0\n", - " 0.82 Kword pairs / s\n", + " 0.17 Kword pairs / s\n", " \n", " \n", - " 10\n", + " True\n", " False\n", - " 00:00:00.013232\n", + " 00:00:00.003124\n", " 0\n", - " 0.53 Kword pairs / s\n", + " 0.90 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.009424\n", + " 00:00:00.001797\n", " 0\n", - " 2.27 Kword pairs / s\n", + " 0.31 Kword pairs / s\n", " \n", " \n", - " 100\n", + " 10\n", + " False\n", " False\n", - " 00:00:00.101245\n", + " 00:00:00.011986\n", " 0\n", - " 0.47 Kword pairs / s\n", + " 0.17 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.021103\n", + " 00:00:00.005972\n", " 0\n", - " 0.77 Kword pairs / s\n", + " 1.59 Kword pairs / s\n", " \n", " \n", - " 2010000\n", - " 1\n", + " True\n", + " False\n", + " 00:00:00.002869\n", + " 0\n", + " 1.15 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.011411\n", + " 0\n", + " 0.60 Kword pairs / s\n", + " \n", + " \n", + " 100\n", + " False\n", + " False\n", + " 00:00:00.111118\n", + " 0\n", + " 0.17 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.007611\n", + " 0\n", + " 5.94 Kword pairs / s\n", + " \n", + " \n", + " True\n", " False\n", - " 00:00:00.205360\n", + " 00:00:00.030875\n", " 0\n", - " 0.10 Kword pairs / s\n", + " 2.38 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.091344\n", + " 00:00:00.050198\n", + " 0\n", + " 0.36 Kword pairs / s\n", + " \n", + " \n", + " 2010000\n", + " 1\n", + " False\n", + " False\n", + " 00:00:00.767305\n", + " 0\n", + " 0.18 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.172432\n", + " 0\n", + " 0.03 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:00.346239\n", + " 0\n", + " 0.46 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.177075\n", + " 0\n", + " 0.15 Kword pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " False\n", + " 00:00:05.156655\n", + " 0\n", + " 0.31 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.631676\n", + " 0\n", + " 0.83 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " False\n", + " 00:00:01.216067\n", + " 0\n", + " 2.41 Kword pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.547773\n", " 0\n", " 0.14 Kword pairs / s\n", " \n", " \n", - " 10\n", + " 100\n", + " False\n", " False\n", - " 00:00:01.252888\n", + " 00:04:10.371035\n", " 0\n", - " 0.20 Kword pairs / s\n", + " 1.24 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.302513\n", + " 00:00:00.634416\n", " 0\n", - " 0.37 Kword pairs / s\n", + " 2.73 Kword pairs / s\n", " \n", " \n", - " 100\n", + " True\n", " False\n", - " 00:00:54.806356\n", + " 00:00:06.586767\n", " 0\n", - " 0.68 Kword pairs / s\n", + " 3.05 Kword pairs / s\n", " \n", " \n", " True\n", - " 00:00:09.599669\n", + " 00:00:09.030932\n", " 0\n", - " 1.82 Kword pairs / s\n", + " 0.32 Kword pairs / s\n", " \n", " \n", "\n", "" ], "text/plain": [ - " duration matrix_nonzero \\\n", - "dictionary_size nonzero_limit symmetric \n", - "10000 1 False 00:00:00.001519 0 \n", - " True 00:00:00.002255 0 \n", - " 10 False 00:00:00.013232 0 \n", - " True 00:00:00.009424 0 \n", - " 100 False 00:00:00.101245 0 \n", - " True 00:00:00.021103 0 \n", - "2010000 1 False 00:00:00.205360 0 \n", - " True 00:00:00.091344 0 \n", - " 10 False 00:00:01.252888 0 \n", - " True 00:00:00.302513 0 \n", - " 100 False 00:00:54.806356 0 \n", - " True 00:00:09.599669 0 \n", + " duration \\\n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 00:00:00.005334 \n", + " True 00:00:00.004072 \n", + " True False 00:00:00.003124 \n", + " True 00:00:00.001797 \n", + " 10 False False 00:00:00.011986 \n", + " True 00:00:00.005972 \n", + " True False 00:00:00.002869 \n", + " True 00:00:00.011411 \n", + " 100 False False 00:00:00.111118 \n", + " True 00:00:00.007611 \n", + " True False 00:00:00.030875 \n", + " True 00:00:00.050198 \n", + "2010000 1 False False 00:00:00.767305 \n", + " True 00:00:00.172432 \n", + " True False 00:00:00.346239 \n", + " True 00:00:00.177075 \n", + " 10 False False 00:00:05.156655 \n", + " True 00:00:00.631676 \n", + " True False 00:00:01.216067 \n", + " True 00:00:00.547773 \n", + " 100 False False 00:04:10.371035 \n", + " True 00:00:00.634416 \n", + " True False 00:00:06.586767 \n", + " True 00:00:09.030932 \n", "\n", - " consumption_speed \n", - "dictionary_size nonzero_limit symmetric \n", - "10000 1 False 0.19 Kword pairs / s \n", - " True 0.82 Kword pairs / s \n", - " 10 False 0.53 Kword pairs / s \n", - " True 2.27 Kword pairs / s \n", - " 100 False 0.47 Kword pairs / s \n", - " True 0.77 Kword pairs / s \n", - "2010000 1 False 0.10 Kword pairs / s \n", - " True 0.14 Kword pairs / s \n", - " 10 False 0.20 Kword pairs / s \n", - " True 0.37 Kword pairs / s \n", - " 100 False 0.68 Kword pairs / s \n", - " True 1.82 Kword pairs / s " + " matrix_nonzero \\\n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + " 10 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + " 100 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + "2010000 1 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + " 10 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + " 100 False False 0 \n", + " True 0 \n", + " True False 0 \n", + " True 0 \n", + "\n", + " consumption_speed \n", + "dictionary_size nonzero_limit symmetric positive_definite \n", + "10000 1 False False 0.28 Kword pairs / s \n", + " True 0.17 Kword pairs / s \n", + " True False 0.90 Kword pairs / s \n", + " True 0.31 Kword pairs / s \n", + " 10 False False 0.17 Kword pairs / s \n", + " True 1.59 Kword pairs / s \n", + " True False 1.15 Kword pairs / s \n", + " True 0.60 Kword pairs / s \n", + " 100 False False 0.17 Kword pairs / s \n", + " True 5.94 Kword pairs / s \n", + " True False 2.38 Kword pairs / s \n", + " True 0.36 Kword pairs / s \n", + "2010000 1 False False 0.18 Kword pairs / s \n", + " True 0.03 Kword pairs / s \n", + " True False 0.46 Kword pairs / s \n", + " True 0.15 Kword pairs / s \n", + " 10 False False 0.31 Kword pairs / s \n", + " True 0.83 Kword pairs / s \n", + " True False 2.41 Kword pairs / s \n", + " True 0.14 Kword pairs / s \n", + " 100 False False 1.24 Kword pairs / s \n", + " True 2.73 Kword pairs / s \n", + " True False 3.05 Kword pairs / s \n", + " True 0.32 Kword pairs / s " ] }, "execution_count": 9, @@ -714,44 +990,44 @@ " \n", " 1000\n", " 1\n", - " 00:00:00.003828\n", - " 261.66 Kword pairs / s\n", + " 00:00:00.002973\n", + " 336.41 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:00.009975\n", - " 1002.80 Kword pairs / s\n", + " 00:00:00.005372\n", + " 1861.64 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.073020\n", - " 1372.84 Kword pairs / s\n", + " 00:00:00.026752\n", + " 3738.79 Kword pairs / s\n", " \n", " \n", " 1000\n", - " 00:00:00.727086\n", - " 1375.54 Kword pairs / s\n", + " 00:00:00.290265\n", + " 3449.16 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", - " 00:00:08.315807\n", - " 241.71 Kword pairs / s\n", + " 00:00:06.318446\n", + " 318.12 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:21.027485\n", - " 955.90 Kword pairs / s\n", + " 00:00:10.783611\n", + " 1863.96 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:02:24.223933\n", - " 1393.70 Kword pairs / s\n", + " 00:00:53.108644\n", + " 3785.04 Kword pairs / s\n", " \n", " \n", " 1000\n", - " 00:23:57.702287\n", - " 1398.09 Kword pairs / s\n", + " 00:09:45.103741\n", + " 3437.36 Kword pairs / s\n", " \n", " \n", "\n", @@ -760,14 +1036,14 @@ "text/plain": [ " production_duration production_speed\n", "dictionary_size nonzero_limit \n", - "1000 1 00:00:00.003828 261.66 Kword pairs / s\n", - " 10 00:00:00.009975 1002.80 Kword pairs / s\n", - " 100 00:00:00.073020 1372.84 Kword pairs / s\n", - " 1000 00:00:00.727086 1375.54 Kword pairs / s\n", - "2010000 1 00:00:08.315807 241.71 Kword pairs / s\n", - " 10 00:00:21.027485 955.90 Kword pairs / s\n", - " 100 00:02:24.223933 1393.70 Kword pairs / s\n", - " 1000 00:23:57.702287 1398.09 Kword pairs / s" + "1000 1 00:00:00.002973 336.41 Kword pairs / s\n", + " 10 00:00:00.005372 1861.64 Kword pairs / s\n", + " 100 00:00:00.026752 3738.79 Kword pairs / s\n", + " 1000 00:00:00.290265 3449.16 Kword pairs / s\n", + "2010000 1 00:00:06.318446 318.12 Kword pairs / s\n", + " 10 00:00:10.783611 1863.96 Kword pairs / s\n", + " 100 00:00:53.108644 3785.04 Kword pairs / s\n", + " 1000 00:09:45.103741 3437.36 Kword pairs / s" ] }, "execution_count": 13, @@ -822,60 +1098,60 @@ " \n", " 1000\n", " 1\n", - " 00:00:00.000163\n", - " 10.67 Kword pairs / s\n", + " 00:00:00.000017\n", + " 1.93 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:00.000174\n", - " 17.08 Kword pairs / s\n", + " 00:00:00.000062\n", + " 21.50 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.004030\n", - " 67.59 Kword pairs / s\n", + " 00:00:00.000408\n", + " 56.66 Kword pairs / s\n", " \n", " \n", " 1000\n", - " 00:00:00.009082\n", - " 16.98 Kword pairs / s\n", + " 00:00:00.010500\n", + " 123.82 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", - " 00:00:00.023885\n", - " 0.70 Kword pairs / s\n", + " 00:00:00.023495\n", + " 1.18 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:00.054096\n", - " 2.46 Kword pairs / s\n", + " 00:00:00.035587\n", + " 6.16 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.763313\n", - " 7.42 Kword pairs / s\n", + " 00:00:00.535765\n", + " 37.76 Kword pairs / s\n", " \n", " \n", " 1000\n", - " 00:00:06.162980\n", - " 5.99 Kword pairs / s\n", + " 00:00:15.037816\n", + " 89.56 Kword pairs / s\n", " \n", " \n", "\n", "" ], "text/plain": [ - " production_duration production_speed\n", - "dictionary_size nonzero_limit \n", - "1000 1 00:00:00.000163 10.67 Kword pairs / s\n", - " 10 00:00:00.000174 17.08 Kword pairs / s\n", - " 100 00:00:00.004030 67.59 Kword pairs / s\n", - " 1000 00:00:00.009082 16.98 Kword pairs / s\n", - "2010000 1 00:00:00.023885 0.70 Kword pairs / s\n", - " 10 00:00:00.054096 2.46 Kword pairs / s\n", - " 100 00:00:00.763313 7.42 Kword pairs / s\n", - " 1000 00:00:06.162980 5.99 Kword pairs / s" + " production_duration production_speed\n", + "dictionary_size nonzero_limit \n", + "1000 1 00:00:00.000017 1.93 Kword pairs / s\n", + " 10 00:00:00.000062 21.50 Kword pairs / s\n", + " 100 00:00:00.000408 56.66 Kword pairs / s\n", + " 1000 00:00:00.010500 123.82 Kword pairs / s\n", + "2010000 1 00:00:00.023495 1.18 Kword pairs / s\n", + " 10 00:00:00.035587 6.16 Kword pairs / s\n", + " 100 00:00:00.535765 37.76 Kword pairs / s\n", + " 1000 00:00:15.037816 89.56 Kword pairs / s" ] }, "execution_count": 14, @@ -948,7 +1224,7 @@ "source": [ "The following tables show how long it takes to retrieve the most similar terms for ten randomly sampled terms from a dictionary (the **production_duration** column), the mean term similarity production speed (the **production_speed** column) and the mean term similarity processing speed (the **processing_speed** column) as we vary the dictionary size (the **dictionary_size** column), and the maximum number of most similar terms that will be retrieved (the **nonzero_limit** column). Ten independendent measurements were taken. The top table shows the mean values and the bottom table shows the standard deviations.\n", "\n", - "The **production_speed** is proportional to **nonzero_limit / dictionary_size**." + "The **production_speed** is proportional to **nonzero_limit / dictionary_size**. The **processing_speed** is constant." ] }, { @@ -1015,59 +1291,59 @@ " \n", " 1000\n", " 1\n", - " 00:00:00.065093\n", - " 153.95 word pairs / s\n", - " 153.95 Kword pairs / s\n", + " 00:00:00.055994\n", + " 178.61 word pairs / s\n", + " 178.61 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:00.064572\n", - " 1551.83 word pairs / s\n", - " 155.18 Kword pairs / s\n", + " 00:00:00.056097\n", + " 1782.70 word pairs / s\n", + " 178.27 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.067055\n", - " 14932.14 word pairs / s\n", - " 149.32 Kword pairs / s\n", + " 00:00:00.056212\n", + " 17791.65 word pairs / s\n", + " 177.92 Kword pairs / s\n", " \n", " \n", " 1000000\n", " 1\n", - " 00:00:37.913246\n", - " 0.26 word pairs / s\n", - " 263.96 Kword pairs / s\n", + " 00:01:20.618070\n", + " 0.12 word pairs / s\n", + " 124.05 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:37.672419\n", - " 2.66 word pairs / s\n", - " 265.62 Kword pairs / s\n", + " 00:01:20.048238\n", + " 1.25 word pairs / s\n", + " 124.92 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:37.523020\n", - " 26.66 word pairs / s\n", - " 266.62 Kword pairs / s\n", + " 00:01:20.064999\n", + " 12.49 word pairs / s\n", + " 124.90 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", - " 00:01:20.311723\n", - " 0.12 word pairs / s\n", - " 250.44 Kword pairs / s\n", + " 00:02:44.069399\n", + " 0.06 word pairs / s\n", + " 122.51 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:01:20.223581\n", - " 1.25 word pairs / s\n", - " 250.73 Kword pairs / s\n", + " 00:02:43.914601\n", + " 0.61 word pairs / s\n", + " 122.63 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:01:20.064439\n", - " 12.49 word pairs / s\n", - " 251.13 Kword pairs / s\n", + " 00:02:43.892408\n", + " 6.10 word pairs / s\n", + " 122.64 Kword pairs / s\n", " \n", " \n", "\n", @@ -1076,27 +1352,27 @@ "text/plain": [ " production_duration production_speed \\\n", "dictionary_size nonzero_limit \n", - "1000 1 00:00:00.065093 153.95 word pairs / s \n", - " 10 00:00:00.064572 1551.83 word pairs / s \n", - " 100 00:00:00.067055 14932.14 word pairs / s \n", - "1000000 1 00:00:37.913246 0.26 word pairs / s \n", - " 10 00:00:37.672419 2.66 word pairs / s \n", - " 100 00:00:37.523020 26.66 word pairs / s \n", - "2010000 1 00:01:20.311723 0.12 word pairs / s \n", - " 10 00:01:20.223581 1.25 word pairs / s \n", - " 100 00:01:20.064439 12.49 word pairs / s \n", + "1000 1 00:00:00.055994 178.61 word pairs / s \n", + " 10 00:00:00.056097 1782.70 word pairs / s \n", + " 100 00:00:00.056212 17791.65 word pairs / s \n", + "1000000 1 00:01:20.618070 0.12 word pairs / s \n", + " 10 00:01:20.048238 1.25 word pairs / s \n", + " 100 00:01:20.064999 12.49 word pairs / s \n", + "2010000 1 00:02:44.069399 0.06 word pairs / s \n", + " 10 00:02:43.914601 0.61 word pairs / s \n", + " 100 00:02:43.892408 6.10 word pairs / s \n", "\n", " processing_speed \n", "dictionary_size nonzero_limit \n", - "1000 1 153.95 Kword pairs / s \n", - " 10 155.18 Kword pairs / s \n", - " 100 149.32 Kword pairs / s \n", - "1000000 1 263.96 Kword pairs / s \n", - " 10 265.62 Kword pairs / s \n", - " 100 266.62 Kword pairs / s \n", - "2010000 1 250.44 Kword pairs / s \n", - " 10 250.73 Kword pairs / s \n", - " 100 251.13 Kword pairs / s " + "1000 1 178.61 Kword pairs / s \n", + " 10 178.27 Kword pairs / s \n", + " 100 177.92 Kword pairs / s \n", + "1000000 1 124.05 Kword pairs / s \n", + " 10 124.92 Kword pairs / s \n", + " 100 124.90 Kword pairs / s \n", + "2010000 1 122.51 Kword pairs / s \n", + " 10 122.63 Kword pairs / s \n", + " 100 122.64 Kword pairs / s " ] }, "execution_count": 18, @@ -1155,59 +1431,59 @@ " \n", " 1000\n", " 1\n", - " 00:00:00.003128\n", - " 7.43 word pairs / s\n", - " 7.43 Kword pairs / s\n", + " 00:00:00.000673\n", + " 2.16 word pairs / s\n", + " 2.16 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:00.003058\n", - " 74.56 word pairs / s\n", - " 7.46 Kword pairs / s\n", + " 00:00:00.000409\n", + " 13.06 word pairs / s\n", + " 1.31 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.002477\n", - " 572.64 word pairs / s\n", - " 5.73 Kword pairs / s\n", + " 00:00:00.000621\n", + " 196.80 word pairs / s\n", + " 1.97 Kword pairs / s\n", " \n", " \n", " 1000000\n", " 1\n", - " 00:00:01.094520\n", - " 0.01 word pairs / s\n", - " 7.67 Kword pairs / s\n", + " 00:00:00.810661\n", + " 0.00 word pairs / s\n", + " 1.23 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:01.034344\n", - " 0.07 word pairs / s\n", - " 7.23 Kword pairs / s\n", + " 00:00:00.110013\n", + " 0.00 word pairs / s\n", + " 0.17 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.833463\n", - " 0.58 word pairs / s\n", - " 5.76 Kword pairs / s\n", + " 00:00:00.164959\n", + " 0.03 word pairs / s\n", + " 0.26 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", - " 00:00:02.193144\n", + " 00:00:01.159273\n", " 0.00 word pairs / s\n", - " 6.78 Kword pairs / s\n", + " 0.85 Kword pairs / s\n", " \n", " \n", " 10\n", - " 00:00:02.284553\n", - " 0.04 word pairs / s\n", - " 7.07 Kword pairs / s\n", + " 00:00:00.429011\n", + " 0.00 word pairs / s\n", + " 0.32 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:01.536381\n", - " 0.24 word pairs / s\n", - " 4.85 Kword pairs / s\n", + " 00:00:00.433687\n", + " 0.02 word pairs / s\n", + " 0.32 Kword pairs / s\n", " \n", " \n", "\n", @@ -1216,27 +1492,27 @@ "text/plain": [ " production_duration production_speed \\\n", "dictionary_size nonzero_limit \n", - "1000 1 00:00:00.003128 7.43 word pairs / s \n", - " 10 00:00:00.003058 74.56 word pairs / s \n", - " 100 00:00:00.002477 572.64 word pairs / s \n", - "1000000 1 00:00:01.094520 0.01 word pairs / s \n", - " 10 00:00:01.034344 0.07 word pairs / s \n", - " 100 00:00:00.833463 0.58 word pairs / s \n", - "2010000 1 00:00:02.193144 0.00 word pairs / s \n", - " 10 00:00:02.284553 0.04 word pairs / s \n", - " 100 00:00:01.536381 0.24 word pairs / s \n", + "1000 1 00:00:00.000673 2.16 word pairs / s \n", + " 10 00:00:00.000409 13.06 word pairs / s \n", + " 100 00:00:00.000621 196.80 word pairs / s \n", + "1000000 1 00:00:00.810661 0.00 word pairs / s \n", + " 10 00:00:00.110013 0.00 word pairs / s \n", + " 100 00:00:00.164959 0.03 word pairs / s \n", + "2010000 1 00:00:01.159273 0.00 word pairs / s \n", + " 10 00:00:00.429011 0.00 word pairs / s \n", + " 100 00:00:00.433687 0.02 word pairs / s \n", "\n", " processing_speed \n", "dictionary_size nonzero_limit \n", - "1000 1 7.43 Kword pairs / s \n", - " 10 7.46 Kword pairs / s \n", - " 100 5.73 Kword pairs / s \n", - "1000000 1 7.67 Kword pairs / s \n", - " 10 7.23 Kword pairs / s \n", - " 100 5.76 Kword pairs / s \n", - "2010000 1 6.78 Kword pairs / s \n", - " 10 7.07 Kword pairs / s \n", - " 100 4.85 Kword pairs / s " + "1000 1 2.16 Kword pairs / s \n", + " 10 1.31 Kword pairs / s \n", + " 100 1.97 Kword pairs / s \n", + "1000000 1 1.23 Kword pairs / s \n", + " 10 0.17 Kword pairs / s \n", + " 100 0.26 Kword pairs / s \n", + "2010000 1 0.85 Kword pairs / s \n", + " 10 0.32 Kword pairs / s \n", + " 100 0.32 Kword pairs / s " ] }, "execution_count": 19, @@ -1307,7 +1583,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18a2afa5fdbc417190e2757b6ea6160f", + "model_id": "842bb1a60f814110a8f20eb44a973397", "version_major": 2, "version_minor": 0 }, @@ -1428,91 +1704,91 @@ " 1000000\n", " 1\n", " 0\n", - " 00:00:00.000016\n", - " 00:00:19.814856\n", + " 00:00:00.000007\n", + " 00:00:19.962977\n", " 0.05 Kword pairs / s\n", - " 50467.35 Kword pairs / s\n", + " 50094.22 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:29.243768\n", - " 00:00:00.086151\n", - " 11.61 Kword pairs / s\n", - " 11607994.56 Kword pairs / s\n", + " 00:00:30.268797\n", + " 00:00:00.097011\n", + " 10.32 Kword pairs / s\n", + " 10320061.76 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:06:18.394023\n", - " 00:00:00.145153\n", - " 6.89 Kword pairs / s\n", - " 6889489.33 Kword pairs / s\n", + " 00:06:23.415982\n", + " 00:00:00.160870\n", + " 6.24 Kword pairs / s\n", + " 6236688.27 Kword pairs / s\n", " \n", " \n", " 100\n", " 0\n", - " 00:00:00.000014\n", - " 00:00:21.404975\n", - " 4.67 Kword pairs / s\n", - " 46718.15 Kword pairs / s\n", + " 00:00:00.000008\n", + " 00:00:22.868372\n", + " 4.37 Kword pairs / s\n", + " 43729.34 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:29.327988\n", - " 00:00:00.148678\n", - " 672.60 Kword pairs / s\n", - " 6725972.71 Kword pairs / s\n", + " 00:00:31.154876\n", + " 00:00:00.156238\n", + " 641.91 Kword pairs / s\n", + " 6419086.99 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:06:17.906254\n", - " 00:00:01.267254\n", - " 78.91 Kword pairs / s\n", - " 789115.67 Kword pairs / s\n", + " 00:06:23.290572\n", + " 00:00:01.297445\n", + " 77.13 Kword pairs / s\n", + " 771277.71 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", " 0\n", - " 00:00:00.000013\n", - " 00:01:55.708445\n", + " 00:00:00.000007\n", + " 00:01:55.303216\n", " 0.01 Kword pairs / s\n", - " 17371.28 Kword pairs / s\n", + " 17432.79 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:01:30.093113\n", - " 00:00:00.169142\n", - " 5.91 Kword pairs / s\n", - " 11883667.16 Kword pairs / s\n", + " 00:01:34.004196\n", + " 00:00:00.190463\n", + " 5.25 Kword pairs / s\n", + " 10561607.14 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:23:21.211156\n", - " 00:00:00.317731\n", - " 3.15 Kword pairs / s\n", - " 6341482.88 Kword pairs / s\n", + " 00:23:29.796006\n", + " 00:00:00.339500\n", + " 2.96 Kword pairs / s\n", + " 5954865.50 Kword pairs / s\n", " \n", " \n", " 100\n", " 0\n", - " 00:00:00.000012\n", - " 00:02:12.106273\n", + " 00:00:00.000007\n", + " 00:02:11.926861\n", " 0.76 Kword pairs / s\n", - " 15215.26 Kword pairs / s\n", + " 15236.46 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:01:30.555084\n", - " 00:00:00.271253\n", - " 368.69 Kword pairs / s\n", - " 7410628.89 Kword pairs / s\n", + " 00:01:35.813414\n", + " 00:00:00.301120\n", + " 332.38 Kword pairs / s\n", + " 6680879.02 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:22:10.017924\n", - " 00:00:02.856382\n", - " 35.01 Kword pairs / s\n", - " 703700.97 Kword pairs / s\n", + " 00:23:05.155399\n", + " 00:00:03.031527\n", + " 33.42 Kword pairs / s\n", + " 671683.05 Kword pairs / s\n", " \n", " \n", "\n", @@ -1521,63 +1797,63 @@ "text/plain": [ " constructor_duration \\\n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 00:00:00.000016 \n", - " 1 00:00:29.243768 \n", - " 100 00:06:18.394023 \n", - " 100 0 00:00:00.000014 \n", - " 1 00:00:29.327988 \n", - " 100 00:06:17.906254 \n", - "2010000 1 0 00:00:00.000013 \n", - " 1 00:01:30.093113 \n", - " 100 00:23:21.211156 \n", - " 100 0 00:00:00.000012 \n", - " 1 00:01:30.555084 \n", - " 100 00:22:10.017924 \n", + "1000000 1 0 00:00:00.000007 \n", + " 1 00:00:30.268797 \n", + " 100 00:06:23.415982 \n", + " 100 0 00:00:00.000008 \n", + " 1 00:00:31.154876 \n", + " 100 00:06:23.290572 \n", + "2010000 1 0 00:00:00.000007 \n", + " 1 00:01:34.004196 \n", + " 100 00:23:29.796006 \n", + " 100 0 00:00:00.000007 \n", + " 1 00:01:35.813414 \n", + " 100 00:23:05.155399 \n", "\n", " production_duration \\\n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 00:00:19.814856 \n", - " 1 00:00:00.086151 \n", - " 100 00:00:00.145153 \n", - " 100 0 00:00:21.404975 \n", - " 1 00:00:00.148678 \n", - " 100 00:00:01.267254 \n", - "2010000 1 0 00:01:55.708445 \n", - " 1 00:00:00.169142 \n", - " 100 00:00:00.317731 \n", - " 100 0 00:02:12.106273 \n", - " 1 00:00:00.271253 \n", - " 100 00:00:02.856382 \n", + "1000000 1 0 00:00:19.962977 \n", + " 1 00:00:00.097011 \n", + " 100 00:00:00.160870 \n", + " 100 0 00:00:22.868372 \n", + " 1 00:00:00.156238 \n", + " 100 00:00:01.297445 \n", + "2010000 1 0 00:01:55.303216 \n", + " 1 00:00:00.190463 \n", + " 100 00:00:00.339500 \n", + " 100 0 00:02:11.926861 \n", + " 1 00:00:00.301120 \n", + " 100 00:00:03.031527 \n", "\n", " production_speed \\\n", "dictionary_size nonzero_limit annoy_n_trees \n", "1000000 1 0 0.05 Kword pairs / s \n", - " 1 11.61 Kword pairs / s \n", - " 100 6.89 Kword pairs / s \n", - " 100 0 4.67 Kword pairs / s \n", - " 1 672.60 Kword pairs / s \n", - " 100 78.91 Kword pairs / s \n", + " 1 10.32 Kword pairs / s \n", + " 100 6.24 Kword pairs / s \n", + " 100 0 4.37 Kword pairs / s \n", + " 1 641.91 Kword pairs / s \n", + " 100 77.13 Kword pairs / s \n", "2010000 1 0 0.01 Kword pairs / s \n", - " 1 5.91 Kword pairs / s \n", - " 100 3.15 Kword pairs / s \n", + " 1 5.25 Kword pairs / s \n", + " 100 2.96 Kword pairs / s \n", " 100 0 0.76 Kword pairs / s \n", - " 1 368.69 Kword pairs / s \n", - " 100 35.01 Kword pairs / s \n", + " 1 332.38 Kword pairs / s \n", + " 100 33.42 Kword pairs / s \n", "\n", " processing_speed \n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 50467.35 Kword pairs / s \n", - " 1 11607994.56 Kword pairs / s \n", - " 100 6889489.33 Kword pairs / s \n", - " 100 0 46718.15 Kword pairs / s \n", - " 1 6725972.71 Kword pairs / s \n", - " 100 789115.67 Kword pairs / s \n", - "2010000 1 0 17371.28 Kword pairs / s \n", - " 1 11883667.16 Kword pairs / s \n", - " 100 6341482.88 Kword pairs / s \n", - " 100 0 15215.26 Kword pairs / s \n", - " 1 7410628.89 Kword pairs / s \n", - " 100 703700.97 Kword pairs / s " + "1000000 1 0 50094.22 Kword pairs / s \n", + " 1 10320061.76 Kword pairs / s \n", + " 100 6236688.27 Kword pairs / s \n", + " 100 0 43729.34 Kword pairs / s \n", + " 1 6419086.99 Kword pairs / s \n", + " 100 771277.71 Kword pairs / s \n", + "2010000 1 0 17432.79 Kword pairs / s \n", + " 1 10561607.14 Kword pairs / s \n", + " 100 5954865.50 Kword pairs / s \n", + " 100 0 15236.46 Kword pairs / s \n", + " 1 6680879.02 Kword pairs / s \n", + " 100 671683.05 Kword pairs / s " ] }, "execution_count": 23, @@ -1639,91 +1915,91 @@ " 1000000\n", " 1\n", " 0\n", - " 00:00:00.000007\n", - " 00:00:00.038433\n", + " 00:00:00.000002\n", + " 00:00:00.115644\n", " 0.00 Kword pairs / s\n", - " 97.76 Kword pairs / s\n", + " 286.27 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:00.037389\n", - " 00:00:00.000601\n", - " 0.08 Kword pairs / s\n", - " 79750.61 Kword pairs / s\n", + " 00:00:01.854097\n", + " 00:00:00.003517\n", + " 0.37 Kword pairs / s\n", + " 367959.55 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.778346\n", - " 00:00:00.000842\n", - " 0.04 Kword pairs / s\n", - " 39420.33 Kword pairs / s\n", + " 00:00:04.702035\n", + " 00:00:00.010444\n", + " 0.35 Kword pairs / s\n", + " 350506.05 Kword pairs / s\n", " \n", " \n", " 100\n", " 0\n", - " 00:00:00\n", - " 00:00:00.019706\n", - " 0.00 Kword pairs / s\n", - " 43.04 Kword pairs / s\n", + " 00:00:00.000002\n", + " 00:00:00.104872\n", + " 0.02 Kword pairs / s\n", + " 198.86 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:00.230572\n", - " 00:00:00.000249\n", - " 1.13 Kword pairs / s\n", - " 11255.12 Kword pairs / s\n", + " 00:00:01.163678\n", + " 00:00:00.008939\n", + " 36.14 Kword pairs / s\n", + " 361441.71 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:00.365938\n", - " 00:00:00.004160\n", - " 0.26 Kword pairs / s\n", - " 2582.72 Kword pairs / s\n", + " 00:00:06.818568\n", + " 00:00:00.036979\n", + " 2.07 Kword pairs / s\n", + " 20741.69 Kword pairs / s\n", " \n", " \n", " 2010000\n", " 1\n", " 0\n", - " 00:00:00\n", - " 00:00:00.165553\n", + " 00:00:00.000001\n", + " 00:00:00.653177\n", " 0.00 Kword pairs / s\n", - " 24.79 Kword pairs / s\n", + " 97.50 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:00.054403\n", - " 00:00:00.000622\n", - " 0.02 Kword pairs / s\n", - " 43763.82 Kword pairs / s\n", + " 00:00:04.677209\n", + " 00:00:00.005679\n", + " 0.16 Kword pairs / s\n", + " 311832.91 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:02:36.227334\n", - " 00:00:00.017605\n", - " 0.15 Kword pairs / s\n", - " 308488.25 Kword pairs / s\n", + " 00:01:38.562684\n", + " 00:00:00.029887\n", + " 0.22 Kword pairs / s\n", + " 434681.25 Kword pairs / s\n", " \n", " \n", " 100\n", " 0\n", " 00:00:00.000001\n", - " 00:00:00.546129\n", - " 0.00 Kword pairs / s\n", - " 62.80 Kword pairs / s\n", + " 00:00:00.979613\n", + " 0.01 Kword pairs / s\n", + " 111.85 Kword pairs / s\n", " \n", " \n", " 1\n", - " 00:00:00.961168\n", - " 00:00:00.002506\n", - " 3.35 Kword pairs / s\n", - " 67336.24 Kword pairs / s\n", + " 00:00:03.207474\n", + " 00:00:00.009479\n", + " 10.18 Kword pairs / s\n", + " 204614.80 Kword pairs / s\n", " \n", " \n", " 100\n", - " 00:00:05.000325\n", - " 00:00:00.013226\n", - " 0.16 Kword pairs / s\n", - " 3258.56 Kword pairs / s\n", + " 00:00:55.119595\n", + " 00:00:00.419531\n", + " 3.46 Kword pairs / s\n", + " 69543.35 Kword pairs / s\n", " \n", " \n", "\n", @@ -1732,63 +2008,63 @@ "text/plain": [ " constructor_duration \\\n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 00:00:00.000007 \n", - " 1 00:00:00.037389 \n", - " 100 00:00:00.778346 \n", - " 100 0 00:00:00 \n", - " 1 00:00:00.230572 \n", - " 100 00:00:00.365938 \n", - "2010000 1 0 00:00:00 \n", - " 1 00:00:00.054403 \n", - " 100 00:02:36.227334 \n", + "1000000 1 0 00:00:00.000002 \n", + " 1 00:00:01.854097 \n", + " 100 00:00:04.702035 \n", + " 100 0 00:00:00.000002 \n", + " 1 00:00:01.163678 \n", + " 100 00:00:06.818568 \n", + "2010000 1 0 00:00:00.000001 \n", + " 1 00:00:04.677209 \n", + " 100 00:01:38.562684 \n", " 100 0 00:00:00.000001 \n", - " 1 00:00:00.961168 \n", - " 100 00:00:05.000325 \n", + " 1 00:00:03.207474 \n", + " 100 00:00:55.119595 \n", "\n", " production_duration \\\n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 00:00:00.038433 \n", - " 1 00:00:00.000601 \n", - " 100 00:00:00.000842 \n", - " 100 0 00:00:00.019706 \n", - " 1 00:00:00.000249 \n", - " 100 00:00:00.004160 \n", - "2010000 1 0 00:00:00.165553 \n", - " 1 00:00:00.000622 \n", - " 100 00:00:00.017605 \n", - " 100 0 00:00:00.546129 \n", - " 1 00:00:00.002506 \n", - " 100 00:00:00.013226 \n", + "1000000 1 0 00:00:00.115644 \n", + " 1 00:00:00.003517 \n", + " 100 00:00:00.010444 \n", + " 100 0 00:00:00.104872 \n", + " 1 00:00:00.008939 \n", + " 100 00:00:00.036979 \n", + "2010000 1 0 00:00:00.653177 \n", + " 1 00:00:00.005679 \n", + " 100 00:00:00.029887 \n", + " 100 0 00:00:00.979613 \n", + " 1 00:00:00.009479 \n", + " 100 00:00:00.419531 \n", "\n", - " production_speed \\\n", - "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 0.00 Kword pairs / s \n", - " 1 0.08 Kword pairs / s \n", - " 100 0.04 Kword pairs / s \n", - " 100 0 0.00 Kword pairs / s \n", - " 1 1.13 Kword pairs / s \n", - " 100 0.26 Kword pairs / s \n", - "2010000 1 0 0.00 Kword pairs / s \n", - " 1 0.02 Kword pairs / s \n", - " 100 0.15 Kword pairs / s \n", - " 100 0 0.00 Kword pairs / s \n", - " 1 3.35 Kword pairs / s \n", - " 100 0.16 Kword pairs / s \n", + " production_speed \\\n", + "dictionary_size nonzero_limit annoy_n_trees \n", + "1000000 1 0 0.00 Kword pairs / s \n", + " 1 0.37 Kword pairs / s \n", + " 100 0.35 Kword pairs / s \n", + " 100 0 0.02 Kword pairs / s \n", + " 1 36.14 Kword pairs / s \n", + " 100 2.07 Kword pairs / s \n", + "2010000 1 0 0.00 Kword pairs / s \n", + " 1 0.16 Kword pairs / s \n", + " 100 0.22 Kword pairs / s \n", + " 100 0 0.01 Kword pairs / s \n", + " 1 10.18 Kword pairs / s \n", + " 100 3.46 Kword pairs / s \n", "\n", " processing_speed \n", "dictionary_size nonzero_limit annoy_n_trees \n", - "1000000 1 0 97.76 Kword pairs / s \n", - " 1 79750.61 Kword pairs / s \n", - " 100 39420.33 Kword pairs / s \n", - " 100 0 43.04 Kword pairs / s \n", - " 1 11255.12 Kword pairs / s \n", - " 100 2582.72 Kword pairs / s \n", - "2010000 1 0 24.79 Kword pairs / s \n", - " 1 43763.82 Kword pairs / s \n", - " 100 308488.25 Kword pairs / s \n", - " 100 0 62.80 Kword pairs / s \n", - " 1 67336.24 Kword pairs / s \n", - " 100 3258.56 Kword pairs / s " + "1000000 1 0 286.27 Kword pairs / s \n", + " 1 367959.55 Kword pairs / s \n", + " 100 350506.05 Kword pairs / s \n", + " 100 0 198.86 Kword pairs / s \n", + " 1 361441.71 Kword pairs / s \n", + " 100 20741.69 Kword pairs / s \n", + "2010000 1 0 97.50 Kword pairs / s \n", + " 1 311832.91 Kword pairs / s \n", + " 100 434681.25 Kword pairs / s \n", + " 100 0 111.85 Kword pairs / s \n", + " 1 204614.80 Kword pairs / s \n", + " 100 69543.35 Kword pairs / s " ] }, "execution_count": 24, @@ -1879,7 +2155,8 @@ " \"nonzero_limit\": nonzero_limit,\n", " \"normalized\": normalized,\n", " \"corpus_size\": corpus_size,\n", - " \"corpus_nonzero\": len(corpus),\n", + " \"corpus_actual_size\": len(corpus),\n", + " \"corpus_nonzero\": sum(len(vec) for vec in corpus),\n", " \"mean_document_length\": np.mean([len(doc) for doc in corpus]),\n", " \"repetition\": repetition,\n", " \"duration\": duration, }" @@ -1893,12 +2170,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "01726e5eb41846ccaa94d3a0357eb4b1", + "model_id": "110675d5552847819754f0dc5b1c19e1", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0, max=4), HTML(value='')))" + "HBox(children=(IntProgress(value=0, max=2), HTML(value='')))" ] }, "metadata": {}, @@ -1914,7 +2191,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e071fb201eae4777bf634c8b9d7f5002", + "model_id": "744e400d597440f79b5923dafb1974fc", "version_major": 2, "version_minor": 0 }, @@ -1935,12 +2212,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "93388b4a2877423392931e656f4f2a42", + "model_id": "0f84efc0c79a4628a9543736fc5f0c9a", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))" + "HBox(children=(IntProgress(value=0, max=2), HTML(value='')))" ] }, "metadata": {}, @@ -1956,12 +2233,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4f997785875b47ae820bb6538c561cfd", + "model_id": "8a185a8e530e4481b90056222f5f0a1c", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(IntProgress(value=0, max=15), HTML(value='')))" + "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))" ] }, "metadata": {}, @@ -1971,7 +2248,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/mnt/storage/home/novotny/gensim/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", " if np.issubdtype(vec.dtype, np.int):\n" ] }, @@ -1985,12 +2262,11 @@ ], "source": [ "seed(RANDOM_SEED)\n", - "dictionary_sizes = [10**k for k in range(3, int(ceil(log10(len(full_dictionary)))))]\n", + "dictionary_sizes = [1000, 100000]\n", "dictionaries = []\n", "for size in tqdm(dictionary_sizes, desc=\"dictionaries\"):\n", " dictionary = Dictionary([sample(list(full_dictionary.values()), size)])\n", " dictionaries.append(dictionary)\n", - "dictionaries.append(full_dictionary)\n", "min_dictionary = sorted((len(dictionary), dictionary) for dictionary in dictionaries)[0][1]\n", "\n", "corpus_sizes = [100, 1000]\n", @@ -2044,7 +2320,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following tables show how long it takes to compute the **inner_product** method between all document vectors in a corpus (the **duration** column), how many nonzero document vectors there are in a corpus (the **corpus_nonzero** column), how many nonzero elements there are in a term similarity matrix (the **matrix_nonzero** column) and the mean document similarity production speed (the **speed** column) as we vary the dictionary size (the **dictionary_size** column), the size of the corpus (the **corpus_size** column), the maximum number of nonzero elements in a single column of the matrix (the **nonzero_limit** column), and the matrix symmetry constraint (the **symmetric** column). Ten independendent measurements were taken. The top table shows the mean values and the bottom table shows the standard deviations.\n", + "The following tables show how long it takes to compute the **inner_product** method between all document vectors in a corpus (the **duration** column), how many nonzero elements there are in a corpus matrix (the **corpus_nonzero** column), how many nonzero elements there are in a term similarity matrix (the **matrix_nonzero** column) and the mean document similarity production speed (the **speed** column) as we vary the dictionary size (the **dictionary_size** column), the size of the corpus (the **corpus_size** column), the maximum number of nonzero elements in a single column of the matrix (the **nonzero_limit** column), and the matrix symmetry constraint (the **symmetric** column). Ten independendent measurements were taken. The top table shows the mean values and the bottom table shows the standard deviations.\n", "\n", "The **speed** is proportional to the square of the number of unique terms shared by the two document vectors. In our scenario as well as the standard IR scenario, this means **speed** is constant. Computing a normalized inner product (**normalized**${}={}$True) results in a constant speed decrease." ] @@ -2056,7 +2332,8 @@ "outputs": [], "source": [ "df = pd.DataFrame(results)\n", - "df[\"speed\"] = df.corpus_nonzero**2 / df.duration\n", + "df[\"speed\"] = df.corpus_actual_size**2 / df.duration\n", + "del df[\"corpus_actual_size\"]\n", "df = df.groupby([\"dictionary_size\", \"corpus_size\", \"nonzero_limit\", \"normalized\"])\n", "\n", "def display(df):\n", @@ -2116,126 +2393,126 @@ " 100\n", " 1\n", " False\n", - " 00:00:00.000844\n", - " 1.0\n", + " 00:00:00.007383\n", + " 3.0\n", " 1000.0\n", - " 1.28 Kdoc pairs / s\n", + " 1.23 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000948\n", - " 1.0\n", + " 00:00:00.009028\n", + " 3.0\n", " 1000.0\n", - " 1.12 Kdoc pairs / s\n", + " 1.01 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.000890\n", - " 1.0\n", - " 84724.0\n", + " 00:00:00.007657\n", + " 3.0\n", + " 84944.0\n", " 1.19 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000852\n", - " 1.0\n", - " 84724.0\n", - " 1.24 Kdoc pairs / s\n", + " 00:00:00.008238\n", + " 3.0\n", + " 84944.0\n", + " 1.10 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.364683\n", - " 24.0\n", + " 00:00:00.414364\n", + " 26.0\n", " 1000.0\n", - " 1.58 Kdoc pairs / s\n", + " 1.39 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.401855\n", - " 24.0\n", + " 00:00:00.473789\n", + " 26.0\n", " 1000.0\n", - " 1.43 Kdoc pairs / s\n", + " 1.22 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.370575\n", - " 24.0\n", - " 84724.0\n", - " 1.56 Kdoc pairs / s\n", + " 00:00:00.430833\n", + " 26.0\n", + " 84944.0\n", + " 1.35 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.397365\n", - " 24.0\n", - " 84724.0\n", - " 1.45 Kdoc pairs / s\n", + " 00:00:00.453477\n", + " 26.0\n", + " 84944.0\n", + " 1.27 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 100\n", " 1\n", " False\n", - " 00:00:06.010311\n", - " 94.0\n", - " 102754.0\n", - " 1.47 Kdoc pairs / s\n", + " 00:00:05.236376\n", + " 423.0\n", + " 101868.0\n", + " 1.29 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:06.431314\n", - " 94.0\n", - " 102754.0\n", - " 1.38 Kdoc pairs / s\n", + " 00:00:05.623463\n", + " 423.0\n", + " 101868.0\n", + " 1.20 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:06.186657\n", - " 94.0\n", - " 8134614.0\n", - " 1.43 Kdoc pairs / s\n", + " 00:00:05.083829\n", + " 423.0\n", + " 8202884.0\n", + " 1.33 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:06.708975\n", - " 94.0\n", - " 8134614.0\n", - " 1.32 Kdoc pairs / s\n", + " 00:00:05.576003\n", + " 423.0\n", + " 8202884.0\n", + " 1.21 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:10:13.258062\n", - " 931.0\n", - " 102754.0\n", - " 1.42 Kdoc pairs / s\n", + " 00:08:59.285347\n", + " 5162.0\n", + " 101868.0\n", + " 1.26 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:11:20.907826\n", - " 931.0\n", - " 102754.0\n", - " 1.28 Kdoc pairs / s\n", + " 00:09:57.693219\n", + " 5162.0\n", + " 101868.0\n", + " 1.14 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:10:31.669595\n", - " 931.0\n", - " 8134614.0\n", - " 1.37 Kdoc pairs / s\n", + " 00:09:23.213450\n", + " 5162.0\n", + " 8202884.0\n", + " 1.21 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:11:31.505501\n", - " 931.0\n", - " 8134614.0\n", - " 1.25 Kdoc pairs / s\n", + " 00:10:10.612458\n", + " 5162.0\n", + " 8202884.0\n", + " 1.12 Kdoc pairs / s\n", " \n", " \n", "\n", @@ -2244,79 +2521,79 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.000844 \n", - " True 00:00:00.000948 \n", - " 100 False 00:00:00.000890 \n", - " True 00:00:00.000852 \n", - " 1000 1 False 00:00:00.364683 \n", - " True 00:00:00.401855 \n", - " 100 False 00:00:00.370575 \n", - " True 00:00:00.397365 \n", - "100000 100 1 False 00:00:06.010311 \n", - " True 00:00:06.431314 \n", - " 100 False 00:00:06.186657 \n", - " True 00:00:06.708975 \n", - " 1000 1 False 00:10:13.258062 \n", - " True 00:11:20.907826 \n", - " 100 False 00:10:31.669595 \n", - " True 00:11:31.505501 \n", + "1000 100 1 False 00:00:00.007383 \n", + " True 00:00:00.009028 \n", + " 100 False 00:00:00.007657 \n", + " True 00:00:00.008238 \n", + " 1000 1 False 00:00:00.414364 \n", + " True 00:00:00.473789 \n", + " 100 False 00:00:00.430833 \n", + " True 00:00:00.453477 \n", + "100000 100 1 False 00:00:05.236376 \n", + " True 00:00:05.623463 \n", + " 100 False 00:00:05.083829 \n", + " True 00:00:05.576003 \n", + " 1000 1 False 00:08:59.285347 \n", + " True 00:09:57.693219 \n", + " 100 False 00:09:23.213450 \n", + " True 00:10:10.612458 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 1.0 \n", - " True 1.0 \n", - " 100 False 1.0 \n", - " True 1.0 \n", - " 1000 1 False 24.0 \n", - " True 24.0 \n", - " 100 False 24.0 \n", - " True 24.0 \n", - "100000 100 1 False 94.0 \n", - " True 94.0 \n", - " 100 False 94.0 \n", - " True 94.0 \n", - " 1000 1 False 931.0 \n", - " True 931.0 \n", - " 100 False 931.0 \n", - " True 931.0 \n", + "1000 100 1 False 3.0 \n", + " True 3.0 \n", + " 100 False 3.0 \n", + " True 3.0 \n", + " 1000 1 False 26.0 \n", + " True 26.0 \n", + " 100 False 26.0 \n", + " True 26.0 \n", + "100000 100 1 False 423.0 \n", + " True 423.0 \n", + " 100 False 423.0 \n", + " True 423.0 \n", + " 1000 1 False 5162.0 \n", + " True 5162.0 \n", + " 100 False 5162.0 \n", + " True 5162.0 \n", "\n", " matrix_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", "1000 100 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 84724.0 \n", - " True 84724.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", " 1000 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 84724.0 \n", - " True 84724.0 \n", - "100000 100 1 False 102754.0 \n", - " True 102754.0 \n", - " 100 False 8134614.0 \n", - " True 8134614.0 \n", - " 1000 1 False 102754.0 \n", - " True 102754.0 \n", - " 100 False 8134614.0 \n", - " True 8134614.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", + "100000 100 1 False 101868.0 \n", + " True 101868.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", + " 1000 1 False 101868.0 \n", + " True 101868.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", "\n", " speed \n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 1.28 Kdoc pairs / s \n", - " True 1.12 Kdoc pairs / s \n", + "1000 100 1 False 1.23 Kdoc pairs / s \n", + " True 1.01 Kdoc pairs / s \n", " 100 False 1.19 Kdoc pairs / s \n", - " True 1.24 Kdoc pairs / s \n", - " 1000 1 False 1.58 Kdoc pairs / s \n", - " True 1.43 Kdoc pairs / s \n", - " 100 False 1.56 Kdoc pairs / s \n", - " True 1.45 Kdoc pairs / s \n", - "100000 100 1 False 1.47 Kdoc pairs / s \n", - " True 1.38 Kdoc pairs / s \n", - " 100 False 1.43 Kdoc pairs / s \n", - " True 1.32 Kdoc pairs / s \n", - " 1000 1 False 1.42 Kdoc pairs / s \n", - " True 1.28 Kdoc pairs / s \n", - " 100 False 1.37 Kdoc pairs / s \n", - " True 1.25 Kdoc pairs / s " + " True 1.10 Kdoc pairs / s \n", + " 1000 1 False 1.39 Kdoc pairs / s \n", + " True 1.22 Kdoc pairs / s \n", + " 100 False 1.35 Kdoc pairs / s \n", + " True 1.27 Kdoc pairs / s \n", + "100000 100 1 False 1.29 Kdoc pairs / s \n", + " True 1.20 Kdoc pairs / s \n", + " 100 False 1.33 Kdoc pairs / s \n", + " True 1.21 Kdoc pairs / s \n", + " 1000 1 False 1.26 Kdoc pairs / s \n", + " True 1.14 Kdoc pairs / s \n", + " 100 False 1.21 Kdoc pairs / s \n", + " True 1.12 Kdoc pairs / s " ] }, "execution_count": 30, @@ -2381,123 +2658,123 @@ " 100\n", " 1\n", " False\n", - " 00:00:00.000266\n", + " 00:00:00.000871\n", " 0.0\n", " 0.0\n", - " 0.33 Kdoc pairs / s\n", + " 0.13 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000233\n", + " 00:00:00.001315\n", " 0.0\n", " 0.0\n", - " 0.28 Kdoc pairs / s\n", + " 0.14 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.000231\n", + " 00:00:00.000893\n", " 0.0\n", " 0.0\n", - " 0.29 Kdoc pairs / s\n", + " 0.12 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000222\n", + " 00:00:00.000631\n", " 0.0\n", " 0.0\n", - " 0.27 Kdoc pairs / s\n", + " 0.08 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.012289\n", + " 00:00:00.014460\n", " 0.0\n", " 0.0\n", " 0.05 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.012813\n", + " 00:00:00.025250\n", " 0.0\n", " 0.0\n", - " 0.04 Kdoc pairs / s\n", + " 0.07 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.016282\n", + " 00:00:00.039088\n", " 0.0\n", " 0.0\n", - " 0.06 Kdoc pairs / s\n", + " 0.11 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.008923\n", + " 00:00:00.023602\n", " 0.0\n", " 0.0\n", - " 0.03 Kdoc pairs / s\n", + " 0.06 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 100\n", " 1\n", " False\n", - " 00:00:00.311038\n", + " 00:00:00.276359\n", " 0.0\n", " 0.0\n", " 0.07 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.199735\n", + " 00:00:00.278806\n", " 0.0\n", " 0.0\n", - " 0.04 Kdoc pairs / s\n", + " 0.06 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.371861\n", + " 00:00:00.286781\n", " 0.0\n", " 0.0\n", - " 0.08 Kdoc pairs / s\n", + " 0.07 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.419363\n", + " 00:00:00.313397\n", " 0.0\n", " 0.0\n", - " 0.08 Kdoc pairs / s\n", + " 0.06 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:26.375716\n", + " 00:00:14.321101\n", " 0.0\n", " 0.0\n", - " 0.06 Kdoc pairs / s\n", + " 0.03 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:37.038096\n", + " 00:00:23.526104\n", " 0.0\n", " 0.0\n", - " 0.07 Kdoc pairs / s\n", + " 0.05 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:27.173464\n", + " 00:00:05.899527\n", " 0.0\n", " 0.0\n", - " 0.05 Kdoc pairs / s\n", + " 0.01 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:25.752512\n", + " 00:00:24.454422\n", " 0.0\n", " 0.0\n", " 0.05 Kdoc pairs / s\n", @@ -2509,22 +2786,22 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.000266 \n", - " True 00:00:00.000233 \n", - " 100 False 00:00:00.000231 \n", - " True 00:00:00.000222 \n", - " 1000 1 False 00:00:00.012289 \n", - " True 00:00:00.012813 \n", - " 100 False 00:00:00.016282 \n", - " True 00:00:00.008923 \n", - "100000 100 1 False 00:00:00.311038 \n", - " True 00:00:00.199735 \n", - " 100 False 00:00:00.371861 \n", - " True 00:00:00.419363 \n", - " 1000 1 False 00:00:26.375716 \n", - " True 00:00:37.038096 \n", - " 100 False 00:00:27.173464 \n", - " True 00:00:25.752512 \n", + "1000 100 1 False 00:00:00.000871 \n", + " True 00:00:00.001315 \n", + " 100 False 00:00:00.000893 \n", + " True 00:00:00.000631 \n", + " 1000 1 False 00:00:00.014460 \n", + " True 00:00:00.025250 \n", + " 100 False 00:00:00.039088 \n", + " True 00:00:00.023602 \n", + "100000 100 1 False 00:00:00.276359 \n", + " True 00:00:00.278806 \n", + " 100 False 00:00:00.286781 \n", + " True 00:00:00.313397 \n", + " 1000 1 False 00:00:14.321101 \n", + " True 00:00:23.526104 \n", + " 100 False 00:00:05.899527 \n", + " True 00:00:24.454422 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", @@ -2566,21 +2843,21 @@ "\n", " speed \n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 0.33 Kdoc pairs / s \n", - " True 0.28 Kdoc pairs / s \n", - " 100 False 0.29 Kdoc pairs / s \n", - " True 0.27 Kdoc pairs / s \n", - " 1000 1 False 0.05 Kdoc pairs / s \n", - " True 0.04 Kdoc pairs / s \n", - " 100 False 0.06 Kdoc pairs / s \n", - " True 0.03 Kdoc pairs / s \n", - "100000 100 1 False 0.07 Kdoc pairs / s \n", - " True 0.04 Kdoc pairs / s \n", - " 100 False 0.08 Kdoc pairs / s \n", + "1000 100 1 False 0.13 Kdoc pairs / s \n", + " True 0.14 Kdoc pairs / s \n", + " 100 False 0.12 Kdoc pairs / s \n", " True 0.08 Kdoc pairs / s \n", - " 1000 1 False 0.06 Kdoc pairs / s \n", + " 1000 1 False 0.05 Kdoc pairs / s \n", " True 0.07 Kdoc pairs / s \n", - " 100 False 0.05 Kdoc pairs / s \n", + " 100 False 0.11 Kdoc pairs / s \n", + " True 0.06 Kdoc pairs / s \n", + "100000 100 1 False 0.07 Kdoc pairs / s \n", + " True 0.06 Kdoc pairs / s \n", + " 100 False 0.07 Kdoc pairs / s \n", + " True 0.06 Kdoc pairs / s \n", + " 1000 1 False 0.03 Kdoc pairs / s \n", + " True 0.05 Kdoc pairs / s \n", + " 100 False 0.01 Kdoc pairs / s \n", " True 0.05 Kdoc pairs / s " ] }, @@ -2626,7 +2903,8 @@ " \"nonzero_limit\": nonzero_limit,\n", " \"normalized\": normalized,\n", " \"corpus_size\": corpus_size,\n", - " \"corpus_nonzero\": len(corpus),\n", + " \"corpus_actual_size\": len(corpus),\n", + " \"corpus_nonzero\": sum(len(vec) for vec in corpus),\n", " \"mean_document_length\": np.mean([len(doc) for doc in corpus]),\n", " \"repetition\": repetition,\n", " \"duration\": duration, }" @@ -2656,7 +2934,8 @@ "outputs": [], "source": [ "df = pd.DataFrame(results)\n", - "df[\"speed\"] = df.corpus_nonzero**2 / df.duration\n", + "df[\"speed\"] = df.corpus_actual_size**2 / df.duration\n", + "del df[\"corpus_actual_size\"]\n", "df = df.groupby([\"dictionary_size\", \"corpus_size\", \"nonzero_limit\", \"normalized\"])\n", "\n", "def display(df):\n", @@ -2716,126 +2995,126 @@ " 100\n", " 1\n", " False\n", - " 00:00:00.026415\n", - " 100.0\n", + " 00:00:00.009363\n", + " 3.0\n", " 1000.0\n", - " 390.39 Kdoc pairs / s\n", + " 1117.12 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.031172\n", - " 100.0\n", + " 00:00:00.010948\n", + " 3.0\n", " 1000.0\n", - " 338.82 Kdoc pairs / s\n", + " 954.13 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.049967\n", - " 100.0\n", - " 83340.0\n", - " 200.15 Kdoc pairs / s\n", + " 00:00:00.014128\n", + " 3.0\n", + " 84944.0\n", + " 728.91 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.047319\n", - " 100.0\n", - " 83340.0\n", - " 216.95 Kdoc pairs / s\n", + " 00:00:00.018164\n", + " 3.0\n", + " 84944.0\n", + " 551.78 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.283383\n", - " 1000.0\n", + " 00:00:00.072091\n", + " 26.0\n", " 1000.0\n", - " 3532.15 Kdoc pairs / s\n", + " 13872.12 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.304221\n", - " 1000.0\n", + " 00:00:00.079284\n", + " 26.0\n", " 1000.0\n", - " 3292.88 Kdoc pairs / s\n", + " 12615.36 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.623971\n", - " 1000.0\n", - " 83340.0\n", - " 1603.08 Kdoc pairs / s\n", + " 00:00:00.162483\n", + " 26.0\n", + " 84944.0\n", + " 6188.43 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.738579\n", - " 1000.0\n", - " 83340.0\n", - " 1354.27 Kdoc pairs / s\n", + " 00:00:00.203081\n", + " 26.0\n", + " 84944.0\n", + " 4924.48 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 100\n", " 1\n", " False\n", - " 00:00:00.418272\n", - " 100.0\n", - " 102420.0\n", - " 23.93 Kdoc pairs / s\n", + " 00:00:00.278253\n", + " 423.0\n", + " 101868.0\n", + " 36.05 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.432476\n", - " 100.0\n", - " 102420.0\n", - " 23.15 Kdoc pairs / s\n", + " 00:00:00.298519\n", + " 423.0\n", + " 101868.0\n", + " 33.56 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:39.347374\n", - " 100.0\n", - " 8288496.0\n", - " 0.26 Kdoc pairs / s\n", + " 00:00:36.326167\n", + " 423.0\n", + " 8202884.0\n", + " 0.28 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:37.148214\n", - " 100.0\n", - " 8288496.0\n", + " 00:00:36.928802\n", + " 423.0\n", + " 8202884.0\n", " 0.27 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:11.689220\n", - " 1000.0\n", - " 102420.0\n", - " 85.55 Kdoc pairs / s\n", + " 00:00:07.403301\n", + " 5162.0\n", + " 101868.0\n", + " 135.08 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:12.112989\n", - " 1000.0\n", - " 102420.0\n", - " 82.60 Kdoc pairs / s\n", + " 00:00:07.794943\n", + " 5162.0\n", + " 101868.0\n", + " 128.29 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:06:26.612026\n", - " 1000.0\n", - " 8288496.0\n", - " 2.61 Kdoc pairs / s\n", + " 00:05:55.674712\n", + " 5162.0\n", + " 8202884.0\n", + " 2.81 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:06:45.335668\n", - " 1000.0\n", - " 8288496.0\n", - " 2.50 Kdoc pairs / s\n", + " 00:06:05.561398\n", + " 5162.0\n", + " 8202884.0\n", + " 2.74 Kdoc pairs / s\n", " \n", " \n", "\n", @@ -2844,79 +3123,79 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.026415 \n", - " True 00:00:00.031172 \n", - " 100 False 00:00:00.049967 \n", - " True 00:00:00.047319 \n", - " 1000 1 False 00:00:00.283383 \n", - " True 00:00:00.304221 \n", - " 100 False 00:00:00.623971 \n", - " True 00:00:00.738579 \n", - "100000 100 1 False 00:00:00.418272 \n", - " True 00:00:00.432476 \n", - " 100 False 00:00:39.347374 \n", - " True 00:00:37.148214 \n", - " 1000 1 False 00:00:11.689220 \n", - " True 00:00:12.112989 \n", - " 100 False 00:06:26.612026 \n", - " True 00:06:45.335668 \n", + "1000 100 1 False 00:00:00.009363 \n", + " True 00:00:00.010948 \n", + " 100 False 00:00:00.014128 \n", + " True 00:00:00.018164 \n", + " 1000 1 False 00:00:00.072091 \n", + " True 00:00:00.079284 \n", + " 100 False 00:00:00.162483 \n", + " True 00:00:00.203081 \n", + "100000 100 1 False 00:00:00.278253 \n", + " True 00:00:00.298519 \n", + " 100 False 00:00:36.326167 \n", + " True 00:00:36.928802 \n", + " 1000 1 False 00:00:07.403301 \n", + " True 00:00:07.794943 \n", + " 100 False 00:05:55.674712 \n", + " True 00:06:05.561398 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 100.0 \n", - " True 100.0 \n", - " 100 False 100.0 \n", - " True 100.0 \n", - " 1000 1 False 1000.0 \n", - " True 1000.0 \n", - " 100 False 1000.0 \n", - " True 1000.0 \n", - "100000 100 1 False 100.0 \n", - " True 100.0 \n", - " 100 False 100.0 \n", - " True 100.0 \n", - " 1000 1 False 1000.0 \n", - " True 1000.0 \n", - " 100 False 1000.0 \n", - " True 1000.0 \n", + "1000 100 1 False 3.0 \n", + " True 3.0 \n", + " 100 False 3.0 \n", + " True 3.0 \n", + " 1000 1 False 26.0 \n", + " True 26.0 \n", + " 100 False 26.0 \n", + " True 26.0 \n", + "100000 100 1 False 423.0 \n", + " True 423.0 \n", + " 100 False 423.0 \n", + " True 423.0 \n", + " 1000 1 False 5162.0 \n", + " True 5162.0 \n", + " 100 False 5162.0 \n", + " True 5162.0 \n", "\n", " matrix_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", "1000 100 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 83340.0 \n", - " True 83340.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", " 1000 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 83340.0 \n", - " True 83340.0 \n", - "100000 100 1 False 102420.0 \n", - " True 102420.0 \n", - " 100 False 8288496.0 \n", - " True 8288496.0 \n", - " 1000 1 False 102420.0 \n", - " True 102420.0 \n", - " 100 False 8288496.0 \n", - " True 8288496.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", + "100000 100 1 False 101868.0 \n", + " True 101868.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", + " 1000 1 False 101868.0 \n", + " True 101868.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", "\n", - " speed \n", - "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 390.39 Kdoc pairs / s \n", - " True 338.82 Kdoc pairs / s \n", - " 100 False 200.15 Kdoc pairs / s \n", - " True 216.95 Kdoc pairs / s \n", - " 1000 1 False 3532.15 Kdoc pairs / s \n", - " True 3292.88 Kdoc pairs / s \n", - " 100 False 1603.08 Kdoc pairs / s \n", - " True 1354.27 Kdoc pairs / s \n", - "100000 100 1 False 23.93 Kdoc pairs / s \n", - " True 23.15 Kdoc pairs / s \n", - " 100 False 0.26 Kdoc pairs / s \n", - " True 0.27 Kdoc pairs / s \n", - " 1000 1 False 85.55 Kdoc pairs / s \n", - " True 82.60 Kdoc pairs / s \n", - " 100 False 2.61 Kdoc pairs / s \n", - " True 2.50 Kdoc pairs / s " + " speed \n", + "dictionary_size corpus_size nonzero_limit normalized \n", + "1000 100 1 False 1117.12 Kdoc pairs / s \n", + " True 954.13 Kdoc pairs / s \n", + " 100 False 728.91 Kdoc pairs / s \n", + " True 551.78 Kdoc pairs / s \n", + " 1000 1 False 13872.12 Kdoc pairs / s \n", + " True 12615.36 Kdoc pairs / s \n", + " 100 False 6188.43 Kdoc pairs / s \n", + " True 4924.48 Kdoc pairs / s \n", + "100000 100 1 False 36.05 Kdoc pairs / s \n", + " True 33.56 Kdoc pairs / s \n", + " 100 False 0.28 Kdoc pairs / s \n", + " True 0.27 Kdoc pairs / s \n", + " 1000 1 False 135.08 Kdoc pairs / s \n", + " True 128.29 Kdoc pairs / s \n", + " 100 False 2.81 Kdoc pairs / s \n", + " True 2.74 Kdoc pairs / s " ] }, "execution_count": 35, @@ -2981,126 +3260,126 @@ " 100\n", " 1\n", " False\n", - " 00:00:00.005496\n", + " 00:00:00.002120\n", " 0.0\n", " 0.0\n", - " 63.28 Kdoc pairs / s\n", + " 242.09 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.007937\n", + " 00:00:00.002387\n", " 0.0\n", " 0.0\n", - " 78.66 Kdoc pairs / s\n", + " 207.64 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.000453\n", + " 00:00:00.002531\n", " 0.0\n", " 0.0\n", - " 1.82 Kdoc pairs / s\n", + " 130.94 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.007808\n", + " 00:00:00.000911\n", " 0.0\n", " 0.0\n", - " 37.91 Kdoc pairs / s\n", + " 27.68 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.009584\n", + " 00:00:00.000587\n", " 0.0\n", " 0.0\n", - " 110.32 Kdoc pairs / s\n", + " 112.92 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.014151\n", + " 00:00:00.001191\n", " 0.0\n", " 0.0\n", - " 138.65 Kdoc pairs / s\n", + " 187.31 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.011108\n", + " 00:00:00.011944\n", " 0.0\n", " 0.0\n", - " 27.67 Kdoc pairs / s\n", + " 513.79 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.011940\n", + " 00:00:00.001793\n", " 0.0\n", " 0.0\n", - " 21.60 Kdoc pairs / s\n", + " 43.54 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 100\n", " 1\n", " False\n", - " 00:00:00.014609\n", + " 00:00:00.016156\n", " 0.0\n", " 0.0\n", - " 0.86 Kdoc pairs / s\n", + " 2.06 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.015215\n", + " 00:00:00.013451\n", " 0.0\n", " 0.0\n", - " 0.82 Kdoc pairs / s\n", + " 1.47 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:04.042557\n", + " 00:00:01.339787\n", " 0.0\n", " 0.0\n", - " 0.03 Kdoc pairs / s\n", + " 0.01 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:02.724963\n", + " 00:00:01.617340\n", " 0.0\n", " 0.0\n", - " 0.02 Kdoc pairs / s\n", + " 0.01 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.087808\n", + " 00:00:00.038961\n", " 0.0\n", " 0.0\n", - " 0.64 Kdoc pairs / s\n", + " 0.71 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.318628\n", + " 00:00:00.024154\n", " 0.0\n", " 0.0\n", - " 2.04 Kdoc pairs / s\n", + " 0.40 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:37.485753\n", + " 00:00:07.604805\n", " 0.0\n", " 0.0\n", - " 0.24 Kdoc pairs / s\n", + " 0.06 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:48.370403\n", + " 00:00:14.799519\n", " 0.0\n", " 0.0\n", - " 0.27 Kdoc pairs / s\n", + " 0.10 Kdoc pairs / s\n", " \n", " \n", "\n", @@ -3109,22 +3388,22 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.005496 \n", - " True 00:00:00.007937 \n", - " 100 False 00:00:00.000453 \n", - " True 00:00:00.007808 \n", - " 1000 1 False 00:00:00.009584 \n", - " True 00:00:00.014151 \n", - " 100 False 00:00:00.011108 \n", - " True 00:00:00.011940 \n", - "100000 100 1 False 00:00:00.014609 \n", - " True 00:00:00.015215 \n", - " 100 False 00:00:04.042557 \n", - " True 00:00:02.724963 \n", - " 1000 1 False 00:00:00.087808 \n", - " True 00:00:00.318628 \n", - " 100 False 00:00:37.485753 \n", - " True 00:00:48.370403 \n", + "1000 100 1 False 00:00:00.002120 \n", + " True 00:00:00.002387 \n", + " 100 False 00:00:00.002531 \n", + " True 00:00:00.000911 \n", + " 1000 1 False 00:00:00.000587 \n", + " True 00:00:00.001191 \n", + " 100 False 00:00:00.011944 \n", + " True 00:00:00.001793 \n", + "100000 100 1 False 00:00:00.016156 \n", + " True 00:00:00.013451 \n", + " 100 False 00:00:01.339787 \n", + " True 00:00:01.617340 \n", + " 1000 1 False 00:00:00.038961 \n", + " True 00:00:00.024154 \n", + " 100 False 00:00:07.604805 \n", + " True 00:00:14.799519 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", @@ -3166,22 +3445,22 @@ "\n", " speed \n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 63.28 Kdoc pairs / s \n", - " True 78.66 Kdoc pairs / s \n", - " 100 False 1.82 Kdoc pairs / s \n", - " True 37.91 Kdoc pairs / s \n", - " 1000 1 False 110.32 Kdoc pairs / s \n", - " True 138.65 Kdoc pairs / s \n", - " 100 False 27.67 Kdoc pairs / s \n", - " True 21.60 Kdoc pairs / s \n", - "100000 100 1 False 0.86 Kdoc pairs / s \n", - " True 0.82 Kdoc pairs / s \n", - " 100 False 0.03 Kdoc pairs / s \n", - " True 0.02 Kdoc pairs / s \n", - " 1000 1 False 0.64 Kdoc pairs / s \n", - " True 2.04 Kdoc pairs / s \n", - " 100 False 0.24 Kdoc pairs / s \n", - " True 0.27 Kdoc pairs / s " + "1000 100 1 False 242.09 Kdoc pairs / s \n", + " True 207.64 Kdoc pairs / s \n", + " 100 False 130.94 Kdoc pairs / s \n", + " True 27.68 Kdoc pairs / s \n", + " 1000 1 False 112.92 Kdoc pairs / s \n", + " True 187.31 Kdoc pairs / s \n", + " 100 False 513.79 Kdoc pairs / s \n", + " True 43.54 Kdoc pairs / s \n", + "100000 100 1 False 2.06 Kdoc pairs / s \n", + " True 1.47 Kdoc pairs / s \n", + " 100 False 0.01 Kdoc pairs / s \n", + " True 0.01 Kdoc pairs / s \n", + " 1000 1 False 0.71 Kdoc pairs / s \n", + " True 0.40 Kdoc pairs / s \n", + " 100 False 0.06 Kdoc pairs / s \n", + " True 0.10 Kdoc pairs / s " ] }, "execution_count": 36, @@ -3226,7 +3505,8 @@ " \"nonzero_limit\": nonzero_limit,\n", " \"normalized\": normalized,\n", " \"corpus_size\": corpus_size,\n", - " \"corpus_nonzero\": len(corpus),\n", + " \"corpus_actual_size\": len(corpus),\n", + " \"corpus_nonzero\": sum(len(vec) for vec in corpus),\n", " \"mean_document_length\": np.mean([len(doc) for doc in corpus]),\n", " \"repetition\": repetition,\n", " \"duration\": duration, }" @@ -3236,29 +3516,68 @@ "cell_type": "code", "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "84e1344be5d944fa98368e6b3994944a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=2), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + " if np.issubdtype(vec.dtype, np.int):\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ - "corpora.append(full_corpus)\n", - "\n", - "configurations = product(matrices, corpora, normalization, repetitions)\n", - "results = benchmark_results(benchmark, configurations, \"matrix_speed.inner-product_results.corpus_corpus\")" + "nonzero_limits = [1000]\n", + "dense_matrices = []\n", + "for (model, dictionary), nonzero_limit in tqdm(\n", + " list(product(zip(models, dictionaries), nonzero_limits)), desc=\"matrices\"):\n", + " annoy = AnnoyIndexer(model, 1)\n", + " index = WordEmbeddingSimilarityIndex(model, kwargs={\"indexer\": annoy})\n", + " matrix = SparseTermSimilarityMatrix(index, dictionary, nonzero_limit=nonzero_limit)\n", + " matrices.append((matrix, dictionary, nonzero_limit))\n", + " del annoy" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 39, "metadata": {}, + "outputs": [], "source": [ - "The **speed** is inversely proportional to **matrix_nonzero**. Computing a normalized inner product (**normalized**${}={}$True) results in a constant speed decrease." + "configurations = product(matrices + dense_matrices, corpora + [full_corpus], normalization, repetitions)\n", + "results = benchmark_results(benchmark, configurations, \"matrix_speed.inner-product_results.corpus_corpus\")" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(results)\n", - "df[\"speed\"] = df.corpus_nonzero**2 / df.duration\n", + "df[\"speed\"] = df.corpus_actual_size**2 / df.duration\n", + "del df[\"corpus_actual_size\"]\n", "df = df.groupby([\"dictionary_size\", \"corpus_size\", \"nonzero_limit\", \"normalized\"])\n", "\n", "def display(df):\n", @@ -3269,7 +3588,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -3314,192 +3633,372 @@ " \n", " \n", " \n", - " 1000\n", - " 100\n", + " 1000\n", + " 100\n", " 1\n", " False\n", - " 00:00:00.001417\n", + " 00:00:00.001403\n", " 3.0\n", " 1000.0\n", - " 6.63 Kdoc pairs / s\n", + " 6.69 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.004412\n", + " 00:00:00.005313\n", " 3.0\n", " 1000.0\n", - " 2.05 Kdoc pairs / s\n", + " 1.70 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:00.001565\n", + " 3.0\n", + " 8634.0\n", + " 5.80 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.005307\n", + " 3.0\n", + " 8634.0\n", + " 1.70 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.002731\n", + " 00:00:00.003172\n", " 3.0\n", - " 83350.0\n", - " 3.31 Kdoc pairs / s\n", + " 84944.0\n", + " 3.05 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.006891\n", + " 00:00:00.008461\n", " 3.0\n", - " 83350.0\n", - " 1.31 Kdoc pairs / s\n", + " 84944.0\n", + " 1.07 Kdoc pairs / s\n", " \n", " \n", - " 1000\n", + " 1000\n", + " False\n", + " 00:00:00.021377\n", + " 3.0\n", + " 838588.0\n", + " 0.42 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.055234\n", + " 3.0\n", + " 838588.0\n", + " 0.16 Kdoc pairs / s\n", + " \n", + " \n", + " 1000\n", " 1\n", " False\n", - " 00:00:00.001284\n", - " 31.0\n", + " 00:00:00.001376\n", + " 26.0\n", " 1000.0\n", - " 749.77 Kdoc pairs / s\n", + " 418.61 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.004089\n", - " 31.0\n", + " 00:00:00.005019\n", + " 26.0\n", " 1000.0\n", - " 235.11 Kdoc pairs / s\n", + " 114.78 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:00.001511\n", + " 26.0\n", + " 8634.0\n", + " 381.50 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.005208\n", + " 26.0\n", + " 8634.0\n", + " 110.60 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.002716\n", - " 31.0\n", - " 83350.0\n", - " 354.93 Kdoc pairs / s\n", + " 00:00:00.003539\n", + " 26.0\n", + " 84944.0\n", + " 164.03 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.006884\n", - " 31.0\n", - " 83350.0\n", - " 139.65 Kdoc pairs / s\n", + " 00:00:00.008502\n", + " 26.0\n", + " 84944.0\n", + " 67.81 Kdoc pairs / s\n", " \n", " \n", - " 100000\n", + " 1000\n", + " False\n", + " 00:00:00.021548\n", + " 26.0\n", + " 838588.0\n", + " 26.73 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.054425\n", + " 26.0\n", + " 838588.0\n", + " 10.59 Kdoc pairs / s\n", + " \n", + " \n", + " 100000\n", " 1\n", " False\n", - " 00:00:00.020794\n", - " 2876.0\n", + " 00:00:00.019915\n", + " 2914.0\n", " 1000.0\n", - " 398435.09 Kdoc pairs / s\n", + " 391443.20 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.026243\n", - " 2876.0\n", + " 00:00:00.026118\n", + " 2914.0\n", " 1000.0\n", - " 315390.91 Kdoc pairs / s\n", + " 298377.75 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:00.020152\n", + " 2914.0\n", + " 8634.0\n", + " 386722.55 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.026998\n", + " 2914.0\n", + " 8634.0\n", + " 288567.14 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.033319\n", - " 2876.0\n", - " 83350.0\n", - " 249153.21 Kdoc pairs / s\n", + " 00:00:00.028345\n", + " 2914.0\n", + " 84944.0\n", + " 274905.36 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.047398\n", - " 2876.0\n", - " 83350.0\n", - " 175611.68 Kdoc pairs / s\n", + " 00:00:00.041069\n", + " 2914.0\n", + " 84944.0\n", + " 189709.57 Kdoc pairs / s\n", " \n", " \n", - " 100000\n", - " 100\n", + " 1000\n", + " False\n", + " 00:00:00.089978\n", + " 2914.0\n", + " 838588.0\n", + " 86598.15 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.185611\n", + " 2914.0\n", + " 838588.0\n", + " 41971.58 Kdoc pairs / s\n", + " \n", + " \n", + " 100000\n", + " 100\n", " 1\n", " False\n", - " 00:00:00.002843\n", - " 78.0\n", - " 101704.0\n", - " 2159.90 Kdoc pairs / s\n", + " 00:00:00.003345\n", + " 423.0\n", + " 101868.0\n", + " 2013.92 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.007447\n", - " 78.0\n", - " 101704.0\n", - " 818.63 Kdoc pairs / s\n", + " 00:00:00.008857\n", + " 423.0\n", + " 101868.0\n", + " 760.13 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:00.032639\n", + " 423.0\n", + " 814154.0\n", + " 206.66 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.080591\n", + " 423.0\n", + " 814154.0\n", + " 83.46 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.371558\n", - " 78.0\n", - " 8092140.0\n", - " 16.83 Kdoc pairs / s\n", + " 00:00:00.488467\n", + " 423.0\n", + " 8202884.0\n", + " 13.77 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:01.055354\n", - " 78.0\n", - " 8092140.0\n", - " 5.78 Kdoc pairs / s\n", + " 00:00:01.454507\n", + " 423.0\n", + " 8202884.0\n", + " 4.62 Kdoc pairs / s\n", " \n", " \n", - " 1000\n", + " 1000\n", + " False\n", + " 00:00:04.973667\n", + " 423.0\n", + " 89912542.0\n", + " 1.35 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:15.035711\n", + " 423.0\n", + " 89912542.0\n", + " 0.45 Kdoc pairs / s\n", + " \n", + " \n", + " 1000\n", " 1\n", " False\n", - " 00:00:00.010172\n", - " 828.0\n", - " 101704.0\n", - " 67407.45 Kdoc pairs / s\n", + " 00:00:00.010141\n", + " 5162.0\n", + " 101868.0\n", + " 67139.73 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.015548\n", - " 828.0\n", - " 101704.0\n", - " 44103.12 Kdoc pairs / s\n", + " 00:00:00.016685\n", + " 5162.0\n", + " 101868.0\n", + " 40798.02 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:00.041392\n", + " 5162.0\n", + " 814154.0\n", + " 16444.18 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:00.091686\n", + " 5162.0\n", + " 814154.0\n", + " 7425.08 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.360084\n", - " 828.0\n", - " 8092140.0\n", - " 1905.11 Kdoc pairs / s\n", + " 00:00:00.508916\n", + " 5162.0\n", + " 8202884.0\n", + " 1338.94 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:01.114117\n", - " 828.0\n", - " 8092140.0\n", - " 618.77 Kdoc pairs / s\n", + " 00:00:01.497556\n", + " 5162.0\n", + " 8202884.0\n", + " 454.49 Kdoc pairs / s\n", " \n", " \n", - " 100000\n", + " 1000\n", + " False\n", + " 00:00:05.101489\n", + " 5162.0\n", + " 89912542.0\n", + " 133.44 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:15.325415\n", + " 5162.0\n", + " 89912542.0\n", + " 44.42 Kdoc pairs / s\n", + " \n", + " \n", + " 100000\n", " 1\n", " False\n", - " 00:00:33.289201\n", - " 84469.0\n", - " 101704.0\n", - " 214349.36 Kdoc pairs / s\n", + " 00:00:37.145526\n", + " 525310.0\n", + " 101868.0\n", + " 192578.80 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:36.110679\n", - " 84469.0\n", - " 101704.0\n", - " 197587.81 Kdoc pairs / s\n", + " 00:00:45.729004\n", + " 525310.0\n", + " 101868.0\n", + " 156431.36 Kdoc pairs / s\n", + " \n", + " \n", + " 10\n", + " False\n", + " 00:00:44.981806\n", + " 525310.0\n", + " 814154.0\n", + " 159029.88 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:00:54.245450\n", + " 525310.0\n", + " 814154.0\n", + " 131871.88 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:46.041435\n", - " 84469.0\n", - " 8092140.0\n", - " 154975.96 Kdoc pairs / s\n", + " 00:01:15.925860\n", + " 525310.0\n", + " 8202884.0\n", + " 94216.21 Kdoc pairs / s\n", + " \n", + " \n", + " True\n", + " 00:01:29.232076\n", + " 525310.0\n", + " 8202884.0\n", + " 80177.08 Kdoc pairs / s\n", + " \n", + " \n", + " 1000\n", + " False\n", + " 00:03:17.140191\n", + " 525310.0\n", + " 89912542.0\n", + " 36286.25 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:52.112290\n", - " 84469.0\n", - " 8092140.0\n", - " 136928.74 Kdoc pairs / s\n", + " 00:04:05.865666\n", + " 525310.0\n", + " 89912542.0\n", + " 29097.14 Kdoc pairs / s\n", " \n", " \n", "\n", @@ -3508,127 +4007,223 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.001417 \n", - " True 00:00:00.004412 \n", - " 100 False 00:00:00.002731 \n", - " True 00:00:00.006891 \n", - " 1000 1 False 00:00:00.001284 \n", - " True 00:00:00.004089 \n", - " 100 False 00:00:00.002716 \n", - " True 00:00:00.006884 \n", - " 100000 1 False 00:00:00.020794 \n", - " True 00:00:00.026243 \n", - " 100 False 00:00:00.033319 \n", - " True 00:00:00.047398 \n", - "100000 100 1 False 00:00:00.002843 \n", - " True 00:00:00.007447 \n", - " 100 False 00:00:00.371558 \n", - " True 00:00:01.055354 \n", - " 1000 1 False 00:00:00.010172 \n", - " True 00:00:00.015548 \n", - " 100 False 00:00:00.360084 \n", - " True 00:00:01.114117 \n", - " 100000 1 False 00:00:33.289201 \n", - " True 00:00:36.110679 \n", - " 100 False 00:00:46.041435 \n", - " True 00:00:52.112290 \n", + "1000 100 1 False 00:00:00.001403 \n", + " True 00:00:00.005313 \n", + " 10 False 00:00:00.001565 \n", + " True 00:00:00.005307 \n", + " 100 False 00:00:00.003172 \n", + " True 00:00:00.008461 \n", + " 1000 False 00:00:00.021377 \n", + " True 00:00:00.055234 \n", + " 1000 1 False 00:00:00.001376 \n", + " True 00:00:00.005019 \n", + " 10 False 00:00:00.001511 \n", + " True 00:00:00.005208 \n", + " 100 False 00:00:00.003539 \n", + " True 00:00:00.008502 \n", + " 1000 False 00:00:00.021548 \n", + " True 00:00:00.054425 \n", + " 100000 1 False 00:00:00.019915 \n", + " True 00:00:00.026118 \n", + " 10 False 00:00:00.020152 \n", + " True 00:00:00.026998 \n", + " 100 False 00:00:00.028345 \n", + " True 00:00:00.041069 \n", + " 1000 False 00:00:00.089978 \n", + " True 00:00:00.185611 \n", + "100000 100 1 False 00:00:00.003345 \n", + " True 00:00:00.008857 \n", + " 10 False 00:00:00.032639 \n", + " True 00:00:00.080591 \n", + " 100 False 00:00:00.488467 \n", + " True 00:00:01.454507 \n", + " 1000 False 00:00:04.973667 \n", + " True 00:00:15.035711 \n", + " 1000 1 False 00:00:00.010141 \n", + " True 00:00:00.016685 \n", + " 10 False 00:00:00.041392 \n", + " True 00:00:00.091686 \n", + " 100 False 00:00:00.508916 \n", + " True 00:00:01.497556 \n", + " 1000 False 00:00:05.101489 \n", + " True 00:00:15.325415 \n", + " 100000 1 False 00:00:37.145526 \n", + " True 00:00:45.729004 \n", + " 10 False 00:00:44.981806 \n", + " True 00:00:54.245450 \n", + " 100 False 00:01:15.925860 \n", + " True 00:01:29.232076 \n", + " 1000 False 00:03:17.140191 \n", + " True 00:04:05.865666 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", "1000 100 1 False 3.0 \n", " True 3.0 \n", + " 10 False 3.0 \n", + " True 3.0 \n", " 100 False 3.0 \n", " True 3.0 \n", - " 1000 1 False 31.0 \n", - " True 31.0 \n", - " 100 False 31.0 \n", - " True 31.0 \n", - " 100000 1 False 2876.0 \n", - " True 2876.0 \n", - " 100 False 2876.0 \n", - " True 2876.0 \n", - "100000 100 1 False 78.0 \n", - " True 78.0 \n", - " 100 False 78.0 \n", - " True 78.0 \n", - " 1000 1 False 828.0 \n", - " True 828.0 \n", - " 100 False 828.0 \n", - " True 828.0 \n", - " 100000 1 False 84469.0 \n", - " True 84469.0 \n", - " 100 False 84469.0 \n", - " True 84469.0 \n", + " 1000 False 3.0 \n", + " True 3.0 \n", + " 1000 1 False 26.0 \n", + " True 26.0 \n", + " 10 False 26.0 \n", + " True 26.0 \n", + " 100 False 26.0 \n", + " True 26.0 \n", + " 1000 False 26.0 \n", + " True 26.0 \n", + " 100000 1 False 2914.0 \n", + " True 2914.0 \n", + " 10 False 2914.0 \n", + " True 2914.0 \n", + " 100 False 2914.0 \n", + " True 2914.0 \n", + " 1000 False 2914.0 \n", + " True 2914.0 \n", + "100000 100 1 False 423.0 \n", + " True 423.0 \n", + " 10 False 423.0 \n", + " True 423.0 \n", + " 100 False 423.0 \n", + " True 423.0 \n", + " 1000 False 423.0 \n", + " True 423.0 \n", + " 1000 1 False 5162.0 \n", + " True 5162.0 \n", + " 10 False 5162.0 \n", + " True 5162.0 \n", + " 100 False 5162.0 \n", + " True 5162.0 \n", + " 1000 False 5162.0 \n", + " True 5162.0 \n", + " 100000 1 False 525310.0 \n", + " True 525310.0 \n", + " 10 False 525310.0 \n", + " True 525310.0 \n", + " 100 False 525310.0 \n", + " True 525310.0 \n", + " 1000 False 525310.0 \n", + " True 525310.0 \n", "\n", " matrix_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", "1000 100 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 83350.0 \n", - " True 83350.0 \n", + " 10 False 8634.0 \n", + " True 8634.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", + " 1000 False 838588.0 \n", + " True 838588.0 \n", " 1000 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 83350.0 \n", - " True 83350.0 \n", + " 10 False 8634.0 \n", + " True 8634.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", + " 1000 False 838588.0 \n", + " True 838588.0 \n", " 100000 1 False 1000.0 \n", " True 1000.0 \n", - " 100 False 83350.0 \n", - " True 83350.0 \n", - "100000 100 1 False 101704.0 \n", - " True 101704.0 \n", - " 100 False 8092140.0 \n", - " True 8092140.0 \n", - " 1000 1 False 101704.0 \n", - " True 101704.0 \n", - " 100 False 8092140.0 \n", - " True 8092140.0 \n", - " 100000 1 False 101704.0 \n", - " True 101704.0 \n", - " 100 False 8092140.0 \n", - " True 8092140.0 \n", + " 10 False 8634.0 \n", + " True 8634.0 \n", + " 100 False 84944.0 \n", + " True 84944.0 \n", + " 1000 False 838588.0 \n", + " True 838588.0 \n", + "100000 100 1 False 101868.0 \n", + " True 101868.0 \n", + " 10 False 814154.0 \n", + " True 814154.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", + " 1000 False 89912542.0 \n", + " True 89912542.0 \n", + " 1000 1 False 101868.0 \n", + " True 101868.0 \n", + " 10 False 814154.0 \n", + " True 814154.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", + " 1000 False 89912542.0 \n", + " True 89912542.0 \n", + " 100000 1 False 101868.0 \n", + " True 101868.0 \n", + " 10 False 814154.0 \n", + " True 814154.0 \n", + " 100 False 8202884.0 \n", + " True 8202884.0 \n", + " 1000 False 89912542.0 \n", + " True 89912542.0 \n", "\n", " speed \n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 6.63 Kdoc pairs / s \n", - " True 2.05 Kdoc pairs / s \n", - " 100 False 3.31 Kdoc pairs / s \n", - " True 1.31 Kdoc pairs / s \n", - " 1000 1 False 749.77 Kdoc pairs / s \n", - " True 235.11 Kdoc pairs / s \n", - " 100 False 354.93 Kdoc pairs / s \n", - " True 139.65 Kdoc pairs / s \n", - " 100000 1 False 398435.09 Kdoc pairs / s \n", - " True 315390.91 Kdoc pairs / s \n", - " 100 False 249153.21 Kdoc pairs / s \n", - " True 175611.68 Kdoc pairs / s \n", - "100000 100 1 False 2159.90 Kdoc pairs / s \n", - " True 818.63 Kdoc pairs / s \n", - " 100 False 16.83 Kdoc pairs / s \n", - " True 5.78 Kdoc pairs / s \n", - " 1000 1 False 67407.45 Kdoc pairs / s \n", - " True 44103.12 Kdoc pairs / s \n", - " 100 False 1905.11 Kdoc pairs / s \n", - " True 618.77 Kdoc pairs / s \n", - " 100000 1 False 214349.36 Kdoc pairs / s \n", - " True 197587.81 Kdoc pairs / s \n", - " 100 False 154975.96 Kdoc pairs / s \n", - " True 136928.74 Kdoc pairs / s " + "1000 100 1 False 6.69 Kdoc pairs / s \n", + " True 1.70 Kdoc pairs / s \n", + " 10 False 5.80 Kdoc pairs / s \n", + " True 1.70 Kdoc pairs / s \n", + " 100 False 3.05 Kdoc pairs / s \n", + " True 1.07 Kdoc pairs / s \n", + " 1000 False 0.42 Kdoc pairs / s \n", + " True 0.16 Kdoc pairs / s \n", + " 1000 1 False 418.61 Kdoc pairs / s \n", + " True 114.78 Kdoc pairs / s \n", + " 10 False 381.50 Kdoc pairs / s \n", + " True 110.60 Kdoc pairs / s \n", + " 100 False 164.03 Kdoc pairs / s \n", + " True 67.81 Kdoc pairs / s \n", + " 1000 False 26.73 Kdoc pairs / s \n", + " True 10.59 Kdoc pairs / s \n", + " 100000 1 False 391443.20 Kdoc pairs / s \n", + " True 298377.75 Kdoc pairs / s \n", + " 10 False 386722.55 Kdoc pairs / s \n", + " True 288567.14 Kdoc pairs / s \n", + " 100 False 274905.36 Kdoc pairs / s \n", + " True 189709.57 Kdoc pairs / s \n", + " 1000 False 86598.15 Kdoc pairs / s \n", + " True 41971.58 Kdoc pairs / s \n", + "100000 100 1 False 2013.92 Kdoc pairs / s \n", + " True 760.13 Kdoc pairs / s \n", + " 10 False 206.66 Kdoc pairs / s \n", + " True 83.46 Kdoc pairs / s \n", + " 100 False 13.77 Kdoc pairs / s \n", + " True 4.62 Kdoc pairs / s \n", + " 1000 False 1.35 Kdoc pairs / s \n", + " True 0.45 Kdoc pairs / s \n", + " 1000 1 False 67139.73 Kdoc pairs / s \n", + " True 40798.02 Kdoc pairs / s \n", + " 10 False 16444.18 Kdoc pairs / s \n", + " True 7425.08 Kdoc pairs / s \n", + " 100 False 1338.94 Kdoc pairs / s \n", + " True 454.49 Kdoc pairs / s \n", + " 1000 False 133.44 Kdoc pairs / s \n", + " True 44.42 Kdoc pairs / s \n", + " 100000 1 False 192578.80 Kdoc pairs / s \n", + " True 156431.36 Kdoc pairs / s \n", + " 10 False 159029.88 Kdoc pairs / s \n", + " True 131871.88 Kdoc pairs / s \n", + " 100 False 94216.21 Kdoc pairs / s \n", + " True 80177.08 Kdoc pairs / s \n", + " 1000 False 36286.25 Kdoc pairs / s \n", + " True 29097.14 Kdoc pairs / s " ] }, - "execution_count": 40, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "display(df.mean()).loc[\n", - " [1000, 100000], :, [1, 100], :].loc[\n", + " [1000, 100000], :, [1, 10, 100, 1000], :].loc[\n", " :, [\"duration\", \"corpus_nonzero\", \"matrix_nonzero\", \"speed\"]]" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -3677,188 +4272,188 @@ " 100\n", " 1\n", " False\n", - " 00:00:00.000271\n", + " 00:00:00.000292\n", " 0.0\n", " 0.0\n", - " 1.64 Kdoc pairs / s\n", + " 1.48 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000283\n", + " 00:00:00.000225\n", " 0.0\n", " 0.0\n", - " 0.15 Kdoc pairs / s\n", + " 0.08 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.000165\n", + " 00:00:00.000747\n", " 0.0\n", " 0.0\n", - " 0.21 Kdoc pairs / s\n", + " 1.02 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000552\n", + " 00:00:00.000488\n", " 0.0\n", " 0.0\n", - " 0.12 Kdoc pairs / s\n", + " 0.07 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.000053\n", + " 00:00:00.000027\n", " 0.0\n", " 0.0\n", - " 32.26 Kdoc pairs / s\n", + " 8.10 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000068\n", + " 00:00:00.000069\n", " 0.0\n", " 0.0\n", - " 3.91 Kdoc pairs / s\n", + " 1.56 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.000164\n", + " 00:00:00.000309\n", " 0.0\n", " 0.0\n", - " 20.53 Kdoc pairs / s\n", + " 16.26 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000147\n", + " 00:00:00.000268\n", " 0.0\n", " 0.0\n", - " 2.96 Kdoc pairs / s\n", + " 2.24 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 1\n", " False\n", - " 00:00:00.000904\n", + " 00:00:00.000576\n", " 0.0\n", " 0.0\n", - " 16848.75 Kdoc pairs / s\n", + " 11256.03 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000698\n", + " 00:00:00.000574\n", " 0.0\n", " 0.0\n", - " 8532.12 Kdoc pairs / s\n", + " 6512.19 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.002211\n", + " 00:00:00.000562\n", " 0.0\n", " 0.0\n", - " 15186.08 Kdoc pairs / s\n", + " 5233.50 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.004056\n", + " 00:00:00.000609\n", " 0.0\n", " 0.0\n", - " 14334.32 Kdoc pairs / s\n", + " 2743.63 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 100\n", " 1\n", " False\n", - " 00:00:00.000285\n", + " 00:00:00.000152\n", " 0.0\n", " 0.0\n", - " 219.71 Kdoc pairs / s\n", + " 98.97 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000351\n", + " 00:00:00.000322\n", " 0.0\n", " 0.0\n", - " 40.01 Kdoc pairs / s\n", + " 28.10 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.079347\n", + " 00:00:00.004997\n", " 0.0\n", " 0.0\n", - " 2.36 Kdoc pairs / s\n", + " 0.14 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.049617\n", + " 00:00:00.022206\n", " 0.0\n", " 0.0\n", - " 0.26 Kdoc pairs / s\n", + " 0.07 Kdoc pairs / s\n", " \n", " \n", " 1000\n", " 1\n", " False\n", - " 00:00:00.000098\n", + " 00:00:00.000210\n", " 0.0\n", " 0.0\n", - " 655.07 Kdoc pairs / s\n", + " 1420.00 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.000237\n", + " 00:00:00.000192\n", " 0.0\n", " 0.0\n", - " 667.64 Kdoc pairs / s\n", + " 467.23 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.009330\n", + " 00:00:00.019022\n", " 0.0\n", " 0.0\n", - " 49.51 Kdoc pairs / s\n", + " 45.91 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.094245\n", + " 00:00:00.004431\n", " 0.0\n", " 0.0\n", - " 44.94 Kdoc pairs / s\n", + " 1.35 Kdoc pairs / s\n", " \n", " \n", " 100000\n", " 1\n", " False\n", - " 00:00:00.299012\n", + " 00:00:00.024466\n", " 0.0\n", " 0.0\n", - " 1884.45 Kdoc pairs / s\n", + " 126.77 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.060703\n", + " 00:00:00.062447\n", " 0.0\n", " 0.0\n", - " 331.59 Kdoc pairs / s\n", + " 213.64 Kdoc pairs / s\n", " \n", " \n", " 100\n", " False\n", - " 00:00:00.316624\n", + " 00:00:00.087692\n", " 0.0\n", " 0.0\n", - " 1065.48 Kdoc pairs / s\n", + " 108.55 Kdoc pairs / s\n", " \n", " \n", " True\n", - " 00:00:00.533292\n", + " 00:00:01.065889\n", " 0.0\n", " 0.0\n", - " 1371.92 Kdoc pairs / s\n", + " 968.80 Kdoc pairs / s\n", " \n", " \n", "\n", @@ -3867,30 +4462,30 @@ "text/plain": [ " duration \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 00:00:00.000271 \n", - " True 00:00:00.000283 \n", - " 100 False 00:00:00.000165 \n", - " True 00:00:00.000552 \n", - " 1000 1 False 00:00:00.000053 \n", - " True 00:00:00.000068 \n", - " 100 False 00:00:00.000164 \n", - " True 00:00:00.000147 \n", - " 100000 1 False 00:00:00.000904 \n", - " True 00:00:00.000698 \n", - " 100 False 00:00:00.002211 \n", - " True 00:00:00.004056 \n", - "100000 100 1 False 00:00:00.000285 \n", - " True 00:00:00.000351 \n", - " 100 False 00:00:00.079347 \n", - " True 00:00:00.049617 \n", - " 1000 1 False 00:00:00.000098 \n", - " True 00:00:00.000237 \n", - " 100 False 00:00:00.009330 \n", - " True 00:00:00.094245 \n", - " 100000 1 False 00:00:00.299012 \n", - " True 00:00:00.060703 \n", - " 100 False 00:00:00.316624 \n", - " True 00:00:00.533292 \n", + "1000 100 1 False 00:00:00.000292 \n", + " True 00:00:00.000225 \n", + " 100 False 00:00:00.000747 \n", + " True 00:00:00.000488 \n", + " 1000 1 False 00:00:00.000027 \n", + " True 00:00:00.000069 \n", + " 100 False 00:00:00.000309 \n", + " True 00:00:00.000268 \n", + " 100000 1 False 00:00:00.000576 \n", + " True 00:00:00.000574 \n", + " 100 False 00:00:00.000562 \n", + " True 00:00:00.000609 \n", + "100000 100 1 False 00:00:00.000152 \n", + " True 00:00:00.000322 \n", + " 100 False 00:00:00.004997 \n", + " True 00:00:00.022206 \n", + " 1000 1 False 00:00:00.000210 \n", + " True 00:00:00.000192 \n", + " 100 False 00:00:00.019022 \n", + " True 00:00:00.004431 \n", + " 100000 1 False 00:00:00.024466 \n", + " True 00:00:00.062447 \n", + " 100 False 00:00:00.087692 \n", + " True 00:00:01.065889 \n", "\n", " corpus_nonzero \\\n", "dictionary_size corpus_size nonzero_limit normalized \n", @@ -3948,33 +4543,33 @@ "\n", " speed \n", "dictionary_size corpus_size nonzero_limit normalized \n", - "1000 100 1 False 1.64 Kdoc pairs / s \n", - " True 0.15 Kdoc pairs / s \n", - " 100 False 0.21 Kdoc pairs / s \n", - " True 0.12 Kdoc pairs / s \n", - " 1000 1 False 32.26 Kdoc pairs / s \n", - " True 3.91 Kdoc pairs / s \n", - " 100 False 20.53 Kdoc pairs / s \n", - " True 2.96 Kdoc pairs / s \n", - " 100000 1 False 16848.75 Kdoc pairs / s \n", - " True 8532.12 Kdoc pairs / s \n", - " 100 False 15186.08 Kdoc pairs / s \n", - " True 14334.32 Kdoc pairs / s \n", - "100000 100 1 False 219.71 Kdoc pairs / s \n", - " True 40.01 Kdoc pairs / s \n", - " 100 False 2.36 Kdoc pairs / s \n", - " True 0.26 Kdoc pairs / s \n", - " 1000 1 False 655.07 Kdoc pairs / s \n", - " True 667.64 Kdoc pairs / s \n", - " 100 False 49.51 Kdoc pairs / s \n", - " True 44.94 Kdoc pairs / s \n", - " 100000 1 False 1884.45 Kdoc pairs / s \n", - " True 331.59 Kdoc pairs / s \n", - " 100 False 1065.48 Kdoc pairs / s \n", - " True 1371.92 Kdoc pairs / s " + "1000 100 1 False 1.48 Kdoc pairs / s \n", + " True 0.08 Kdoc pairs / s \n", + " 100 False 1.02 Kdoc pairs / s \n", + " True 0.07 Kdoc pairs / s \n", + " 1000 1 False 8.10 Kdoc pairs / s \n", + " True 1.56 Kdoc pairs / s \n", + " 100 False 16.26 Kdoc pairs / s \n", + " True 2.24 Kdoc pairs / s \n", + " 100000 1 False 11256.03 Kdoc pairs / s \n", + " True 6512.19 Kdoc pairs / s \n", + " 100 False 5233.50 Kdoc pairs / s \n", + " True 2743.63 Kdoc pairs / s \n", + "100000 100 1 False 98.97 Kdoc pairs / s \n", + " True 28.10 Kdoc pairs / s \n", + " 100 False 0.14 Kdoc pairs / s \n", + " True 0.07 Kdoc pairs / s \n", + " 1000 1 False 1420.00 Kdoc pairs / s \n", + " True 467.23 Kdoc pairs / s \n", + " 100 False 45.91 Kdoc pairs / s \n", + " True 1.35 Kdoc pairs / s \n", + " 100000 1 False 126.77 Kdoc pairs / s \n", + " True 213.64 Kdoc pairs / s \n", + " 100 False 108.55 Kdoc pairs / s \n", + " True 968.80 Kdoc pairs / s " ] }, - "execution_count": 41, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/notebooks/soft_cosine_tutorial.ipynb b/docs/notebooks/soft_cosine_tutorial.ipynb index 47ffa167d1..96918bec69 100644 --- a/docs/notebooks/soft_cosine_tutorial.ipynb +++ b/docs/notebooks/soft_cosine_tutorial.ipynb @@ -84,6 +84,16 @@ "scrolled": true }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/importlib/_bootstrap.py:321: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", + " return f(*args, **kwds)\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/importlib/_bootstrap.py:321: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", + " return f(*args, **kwds)\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -97,9 +107,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2018-07-13 01:15:08,869 : INFO : 'pattern' package not found; tag filters are not available for English\n", - "2018-07-13 01:15:08,872 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", - "2018-07-13 01:15:08,873 : INFO : built Dictionary(14 unique tokens: ['orange', 'chicago', 'president', 'press', 'time']...) from 3 documents (total 15 corpus positions)\n" + "2018-09-11 22:02:01,041 : INFO : 'pattern' package not found; tag filters are not available for English\n", + "2018-09-11 22:02:01,044 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:02:01,045 : INFO : built Dictionary(14 unique tokens: ['speaks', 'illinois', 'greets', 'juice', 'chicago']...) from 3 documents (total 15 corpus positions)\n" ] } ], @@ -142,23 +152,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "2018-07-13 01:15:09,066 : INFO : loading projection weights from /home/novotny/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz\n", - "2018-07-13 01:15:37,302 : INFO : loaded (400000, 50) matrix from /home/novotny/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz\n", - "2018-07-13 01:15:37,303 : INFO : constructing a sparse term similarity matrix using \n", - "2018-07-13 01:15:37,304 : INFO : iterating over columns in dictionary order\n", - "2018-07-13 01:15:37,305 : INFO : PROGRESS: at 7.14% columns (1 / 14, 7.142857% density, 7.142857% projected density)\n", - "2018-07-13 01:15:37,306 : INFO : precomputing L2-norms of word weight vectors\n", - "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim-3.4.0-py3.4-linux-x86_64.egg/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + "2018-09-11 22:02:01,236 : INFO : loading projection weights from /home/novotny/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz\n", + "2018-09-11 22:02:26,984 : INFO : loaded (400000, 50) matrix from /home/novotny/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz\n", + "2018-09-11 22:02:26,985 : INFO : constructing a sparse term similarity matrix using \n", + "2018-09-11 22:02:26,986 : INFO : iterating over columns in dictionary order\n", + "2018-09-11 22:02:26,987 : INFO : PROGRESS: at 7.14% columns (1 / 14, 7.142857% density, 7.142857% projected density)\n", + "2018-09-11 22:02:26,988 : INFO : precomputing L2-norms of word weight vectors\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", " if np.issubdtype(vec.dtype, np.int):\n", - "2018-07-13 01:15:37,598 : INFO : constructed a sparse term similarity matrix with 11.224490% density\n" + "2018-09-11 22:02:27,273 : INFO : constructed a sparse term similarity matrix with 11.224490% density\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 29.7 s, sys: 2.95 s, total: 32.7 s\n", - "Wall time: 28.7 s\n" + "CPU times: user 27.8 s, sys: 2.43 s, total: 30.3 s\n", + "Wall time: 26.2 s\n" ] } ], @@ -251,8 +261,8 @@ "[nltk_data] /home/novotny/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n", "Number of documents: 3\n", - "CPU times: user 2min 38s, sys: 1.71 s, total: 2min 40s\n", - "Wall time: 2min 40s\n" + "CPU times: user 2min 37s, sys: 1.62 s, total: 2min 39s\n", + "Wall time: 2min 39s\n" ] } ], @@ -310,65 +320,1385 @@ "name": "stderr", "output_type": "stream", "text": [ - "2018-07-13 01:18:18,526 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", - "2018-07-13 01:19:19,198 : INFO : adding document #2270000 to Dictionary(462407 unique tokens: ['assymmetric', 'oronamin', 'govn', 'بلد', 'moussaoui']...)\n", - "2018-07-13 01:19:19,310 : INFO : built Dictionary(462807 unique tokens: ['assymmetric', 'oronamin', 'govn', 'بلد', 'moussaoui']...) from 2274338 documents (total 40096354 corpus positions)\n", - "2018-07-13 01:19:20,952 : INFO : collecting all words and their counts\n", - "2018-07-13 01:19:20,953 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types\n", - "2018-07-13 01:19:29,140 : INFO : PROGRESS: at sentence #2270000, processed 40024958 words, keeping 462407 word types\n", - "2018-07-13 01:19:29,155 : INFO : collected 462807 word types from a corpus of 40096354 raw words and 2274338 sentences\n", - "2018-07-13 01:19:29,156 : INFO : Loading a fresh vocabulary\n", - "2018-07-13 01:19:29,589 : INFO : effective_min_count=5 retains 104360 unique words (22% of original 462807, drops 358447)\n", - "2018-07-13 01:19:29,590 : INFO : effective_min_count=5 leaves 39565168 word corpus (98% of original 40096354, drops 531186)\n", - "2018-07-13 01:19:30,065 : INFO : deleting the raw counts dictionary of 462807 items\n", - "2018-07-13 01:19:30,085 : INFO : sample=0.001 downsamples 22 most-common words\n", - "2018-07-13 01:19:30,086 : INFO : downsampling leaves estimated 38552993 word corpus (97.4% of prior 39565168)\n", - "2018-07-13 01:19:30,529 : INFO : estimated required memory for 104360 words and 300 dimensions: 302644000 bytes\n", - "2018-07-13 01:19:30,530 : INFO : resetting layer weights\n", - "2018-07-13 01:19:32,506 : INFO : training model with 32 workers on 104360 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=5 window=5\n", - "2018-07-13 01:19:33,526 : INFO : EPOCH 1 - PROGRESS: at 2.52% examples, 960792 words/s, in_qsize 64, out_qsize 0\n", - "2018-07-13 01:20:03,847 : INFO : EPOCH 1 - PROGRESS: at 97.69% examples, 1201032 words/s, in_qsize 62, out_qsize 1\n", - "2018-07-13 01:20:04,329 : INFO : worker thread finished; awaiting finish of 31 more threads\n", - "2018-07-13 01:20:04,460 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2018-07-13 01:20:04,460 : INFO : EPOCH - 1 : training on 40096354 raw words (38515125 effective words) took 31.9s, 1205790 effective words/s\n", - "2018-07-13 01:20:05,515 : INFO : EPOCH 2 - PROGRESS: at 3.01% examples, 1110081 words/s, in_qsize 63, out_qsize 0\n", - "2018-07-13 01:20:35,787 : INFO : EPOCH 2 - PROGRESS: at 97.65% examples, 1201557 words/s, in_qsize 64, out_qsize 0\n", - "2018-07-13 01:20:36,224 : INFO : worker thread finished; awaiting finish of 31 more threads\n", - "2018-07-13 01:20:36,422 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2018-07-13 01:20:36,423 : INFO : EPOCH - 2 : training on 40096354 raw words (38515218 effective words) took 31.9s, 1205725 effective words/s\n", - "2018-07-13 01:20:37,447 : INFO : EPOCH 3 - PROGRESS: at 2.76% examples, 1048462 words/s, in_qsize 63, out_qsize 0\n", - "2018-07-13 01:21:07,768 : INFO : EPOCH 3 - PROGRESS: at 98.86% examples, 1215807 words/s, in_qsize 45, out_qsize 0\n", - "2018-07-13 01:21:07,854 : INFO : worker thread finished; awaiting finish of 31 more threads\n", - "2018-07-13 01:21:08,033 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2018-07-13 01:21:08,034 : INFO : EPOCH - 3 : training on 40096354 raw words (38515910 effective words) took 31.6s, 1219133 effective words/s\n", - "2018-07-13 01:21:09,046 : INFO : EPOCH 4 - PROGRESS: at 2.61% examples, 995957 words/s, in_qsize 61, out_qsize 2\n", - "2018-07-13 01:21:39,376 : INFO : EPOCH 4 - PROGRESS: at 98.42% examples, 1210180 words/s, in_qsize 63, out_qsize 0\n", - "2018-07-13 01:21:39,641 : INFO : worker thread finished; awaiting finish of 31 more threads\n", - "2018-07-13 01:21:39,810 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2018-07-13 01:21:39,811 : INFO : EPOCH - 4 : training on 40096354 raw words (38516888 effective words) took 31.8s, 1212546 effective words/s\n", - "2018-07-13 01:21:40,826 : INFO : EPOCH 5 - PROGRESS: at 2.45% examples, 936123 words/s, in_qsize 64, out_qsize 0\n", - "2018-07-13 01:22:11,124 : INFO : EPOCH 5 - PROGRESS: at 97.55% examples, 1200270 words/s, in_qsize 60, out_qsize 3\n", - "2018-07-13 01:22:11,661 : INFO : worker thread finished; awaiting finish of 31 more threads\n", - "2018-07-13 01:22:11,814 : INFO : worker thread finished; awaiting finish of 0 more threads\n", - "2018-07-13 01:22:11,814 : INFO : EPOCH - 5 : training on 40096354 raw words (38514994 effective words) took 32.0s, 1203907 effective words/s\n", - "2018-07-13 01:22:11,815 : INFO : training on a 200481770 raw words (192578135 effective words) took 159.3s, 1208836 effective words/s\n", - "2018-07-13 01:22:12,203 : INFO : constructing a sparse term similarity matrix using \n", - "2018-07-13 01:22:12,222 : INFO : iterating over columns in tf-idf order\n", - "2018-07-13 01:22:13,381 : INFO : PROGRESS: at 0.00% columns (1 / 462807, 0.000216% density, 0.000216% projected density)\n", - "2018-07-13 01:22:13,715 : INFO : precomputing L2-norms of word weight vectors\n", - "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim-3.4.0-py3.4-linux-x86_64.egg/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + "2018-09-11 22:05:07,212 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:05:07,485 : INFO : adding document #10000 to Dictionary(20088 unique tokens: ['authours', 'chimney', 'bombard', 'euro', 'lies']...)\n", + "2018-09-11 22:05:07,751 : INFO : adding document #20000 to Dictionary(29692 unique tokens: ['biscit', 'pharaonic', 'authours', 'chimney', 'unanimous']...)\n", + "2018-09-11 22:05:08,036 : INFO : adding document #30000 to Dictionary(37971 unique tokens: ['biscit', 'pharaonic', 'authours', 'chimney', 'unanimous']...)\n", + "2018-09-11 22:05:08,293 : INFO : adding document #40000 to Dictionary(43930 unique tokens: ['biscit', 'chimney', 'strangers', 'untruths', 'apes']...)\n", + "2018-09-11 22:05:08,550 : INFO : adding document #50000 to Dictionary(49340 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'untruths']...)\n", + "2018-09-11 22:05:08,825 : INFO : adding document #60000 to Dictionary(54734 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'smartness']...)\n", + "2018-09-11 22:05:09,096 : INFO : adding document #70000 to Dictionary(59734 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'smartness']...)\n", + "2018-09-11 22:05:09,380 : INFO : adding document #80000 to Dictionary(64698 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'smartness']...)\n", + "2018-09-11 22:05:09,647 : INFO : adding document #90000 to Dictionary(68921 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'smartness']...)\n", + "2018-09-11 22:05:09,923 : INFO : adding document #100000 to Dictionary(74025 unique tokens: ['biscit', 'chimney', 'strangers', 'zimbabawe', 'smartness']...)\n", + "2018-09-11 22:05:10,198 : INFO : adding document #110000 to Dictionary(78063 unique tokens: ['biscit', 'chimney', 'deletd', 'strangers', 'zimbabawe']...)\n", + "2018-09-11 22:05:10,477 : INFO : adding document #120000 to Dictionary(81932 unique tokens: ['biscit', 'chimney', 'palestin', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:10,739 : INFO : adding document #130000 to Dictionary(85850 unique tokens: ['biscit', 'chimney', 'palestin', 'nisaan', 'deletd']...)\n", + "2018-09-11 22:05:11,023 : INFO : adding document #140000 to Dictionary(89489 unique tokens: ['chimney', 'deletd', 'strangers', 'smartness', 'apes']...)\n", + "2018-09-11 22:05:11,292 : INFO : adding document #150000 to Dictionary(93441 unique tokens: ['chimney', 'deletd', 'strangers', 'kolayaalee', 'smartness']...)\n", + "2018-09-11 22:05:11,560 : INFO : adding document #160000 to Dictionary(97166 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:11,827 : INFO : adding document #170000 to Dictionary(100281 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:12,087 : INFO : adding document #180000 to Dictionary(103372 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:12,366 : INFO : adding document #190000 to Dictionary(106627 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:12,649 : INFO : adding document #200000 to Dictionary(110902 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:12,924 : INFO : adding document #210000 to Dictionary(113686 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:13,204 : INFO : adding document #220000 to Dictionary(117110 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:13,471 : INFO : adding document #230000 to Dictionary(119961 unique tokens: ['chimney', 'adib', 'deletd', 'strangers', 'kolayaalee']...)\n", + "2018-09-11 22:05:13,754 : INFO : adding document #240000 to Dictionary(123182 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:14,023 : INFO : adding document #250000 to Dictionary(125952 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:14,306 : INFO : adding document #260000 to Dictionary(128806 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:14,575 : INFO : adding document #270000 to Dictionary(131361 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:14,848 : INFO : adding document #280000 to Dictionary(133942 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:15,116 : INFO : adding document #290000 to Dictionary(136306 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:15,390 : INFO : adding document #300000 to Dictionary(138957 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:15,676 : INFO : adding document #310000 to Dictionary(141490 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:15,953 : INFO : adding document #320000 to Dictionary(144071 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:16,224 : INFO : adding document #330000 to Dictionary(146510 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:16,488 : INFO : adding document #340000 to Dictionary(149053 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:16,776 : INFO : adding document #350000 to Dictionary(151463 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:17,052 : INFO : adding document #360000 to Dictionary(153612 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:17,323 : INFO : adding document #370000 to Dictionary(156234 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:17,620 : INFO : adding document #380000 to Dictionary(158845 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:17,882 : INFO : adding document #390000 to Dictionary(161029 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:18,136 : INFO : adding document #400000 to Dictionary(163444 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:18,401 : INFO : adding document #410000 to Dictionary(165551 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:18,666 : INFO : adding document #420000 to Dictionary(167864 unique tokens: ['مصرية', 'chimney', 'adib', 'deletd', 'strangers']...)\n", + "2018-09-11 22:05:18,931 : INFO : adding document #430000 to Dictionary(169982 unique tokens: ['مصرية', 'chimney', 'pples', 'adib', 'deletd']...)\n", + "2018-09-11 22:05:19,203 : INFO : adding document #440000 to Dictionary(172106 unique tokens: ['مصرية', 'chimney', 'pples', 'adib', 'deletd']...)\n", + "2018-09-11 22:05:19,452 : INFO : adding document #450000 to Dictionary(174128 unique tokens: ['مصرية', 'chimney', 'pples', 'adib', 'deletd']...)\n", + "2018-09-11 22:05:19,722 : INFO : adding document #460000 to Dictionary(176267 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:19,985 : INFO : adding document #470000 to Dictionary(178429 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:20,249 : INFO : adding document #480000 to Dictionary(180738 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:20,513 : INFO : adding document #490000 to Dictionary(182982 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:20,780 : INFO : adding document #500000 to Dictionary(184754 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:21,048 : INFO : adding document #510000 to Dictionary(187327 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:21,317 : INFO : adding document #520000 to Dictionary(189327 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:21,574 : INFO : adding document #530000 to Dictionary(191219 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:21,837 : INFO : adding document #540000 to Dictionary(193182 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:05:22,111 : INFO : adding document #550000 to Dictionary(195951 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:22,389 : INFO : adding document #560000 to Dictionary(197956 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:22,669 : INFO : adding document #570000 to Dictionary(200145 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:22,926 : INFO : adding document #580000 to Dictionary(201859 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:23,178 : INFO : adding document #590000 to Dictionary(203724 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:23,438 : INFO : adding document #600000 to Dictionary(205607 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:23,703 : INFO : adding document #610000 to Dictionary(207387 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:23,967 : INFO : adding document #620000 to Dictionary(209246 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:24,221 : INFO : adding document #630000 to Dictionary(211094 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:24,477 : INFO : adding document #640000 to Dictionary(212963 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:24,733 : INFO : adding document #650000 to Dictionary(214666 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:25,002 : INFO : adding document #660000 to Dictionary(216409 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:25,271 : INFO : adding document #670000 to Dictionary(218264 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:25,533 : INFO : adding document #680000 to Dictionary(220129 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:25,822 : INFO : adding document #690000 to Dictionary(222075 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:26,078 : INFO : adding document #700000 to Dictionary(223880 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:26,378 : INFO : adding document #710000 to Dictionary(225982 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:26,637 : INFO : adding document #720000 to Dictionary(227672 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:26,899 : INFO : adding document #730000 to Dictionary(229371 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:27,164 : INFO : adding document #740000 to Dictionary(231078 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:27,417 : INFO : adding document #750000 to Dictionary(232982 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:27,687 : INFO : adding document #760000 to Dictionary(234746 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:27,947 : INFO : adding document #770000 to Dictionary(236494 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:28,198 : INFO : adding document #780000 to Dictionary(238199 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:28,462 : INFO : adding document #790000 to Dictionary(240021 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:28,744 : INFO : adding document #800000 to Dictionary(242280 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:29,017 : INFO : adding document #810000 to Dictionary(244318 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:29,288 : INFO : adding document #820000 to Dictionary(246133 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:29,553 : INFO : adding document #830000 to Dictionary(247703 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:29,815 : INFO : adding document #840000 to Dictionary(249458 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:30,077 : INFO : adding document #850000 to Dictionary(251278 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:30,343 : INFO : adding document #860000 to Dictionary(252971 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:30,614 : INFO : adding document #870000 to Dictionary(254789 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:30,876 : INFO : adding document #880000 to Dictionary(256483 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:31,131 : INFO : adding document #890000 to Dictionary(258416 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:31,400 : INFO : adding document #900000 to Dictionary(260098 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:31,651 : INFO : adding document #910000 to Dictionary(261700 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:31,915 : INFO : adding document #920000 to Dictionary(263313 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:32,176 : INFO : adding document #930000 to Dictionary(264839 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:32,435 : INFO : adding document #940000 to Dictionary(266327 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:32,703 : INFO : adding document #950000 to Dictionary(267891 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:32,971 : INFO : adding document #960000 to Dictionary(270437 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:33,235 : INFO : adding document #970000 to Dictionary(272420 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:33,497 : INFO : adding document #980000 to Dictionary(274058 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:33,765 : INFO : adding document #990000 to Dictionary(275579 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:34,032 : INFO : adding document #1000000 to Dictionary(277402 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:34,300 : INFO : adding document #1010000 to Dictionary(279035 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:34,567 : INFO : adding document #1020000 to Dictionary(280584 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:34,831 : INFO : adding document #1030000 to Dictionary(282206 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:35,090 : INFO : adding document #1040000 to Dictionary(283570 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:35,367 : INFO : adding document #1050000 to Dictionary(285112 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:35,628 : INFO : adding document #1060000 to Dictionary(286666 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:35,895 : INFO : adding document #1070000 to Dictionary(288122 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:36,158 : INFO : adding document #1080000 to Dictionary(289489 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:05:36,423 : INFO : adding document #1090000 to Dictionary(291139 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:36,695 : INFO : adding document #1100000 to Dictionary(293838 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:36,955 : INFO : adding document #1110000 to Dictionary(295273 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:37,219 : INFO : adding document #1120000 to Dictionary(296816 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:37,478 : INFO : adding document #1130000 to Dictionary(298552 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:37,692 : INFO : adding document #1140000 to Dictionary(299628 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:37,957 : INFO : adding document #1150000 to Dictionary(301139 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:38,229 : INFO : adding document #1160000 to Dictionary(302566 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:38,495 : INFO : adding document #1170000 to Dictionary(304039 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:38,753 : INFO : adding document #1180000 to Dictionary(305503 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:39,015 : INFO : adding document #1190000 to Dictionary(307005 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:39,282 : INFO : adding document #1200000 to Dictionary(308842 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:39,559 : INFO : adding document #1210000 to Dictionary(310414 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:39,833 : INFO : adding document #1220000 to Dictionary(312012 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:40,097 : INFO : adding document #1230000 to Dictionary(313850 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:40,372 : INFO : adding document #1240000 to Dictionary(315829 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:40,634 : INFO : adding document #1250000 to Dictionary(317188 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'strangers']...)\n", + "2018-09-11 22:05:40,898 : INFO : adding document #1260000 to Dictionary(318577 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:41,151 : INFO : adding document #1270000 to Dictionary(320245 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:41,427 : INFO : adding document #1280000 to Dictionary(321715 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:41,694 : INFO : adding document #1290000 to Dictionary(323216 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:41,977 : INFO : adding document #1300000 to Dictionary(324767 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:42,233 : INFO : adding document #1310000 to Dictionary(326386 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:42,560 : INFO : adding document #1320000 to Dictionary(329383 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:42,810 : INFO : adding document #1330000 to Dictionary(330810 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:43,073 : INFO : adding document #1340000 to Dictionary(332299 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:43,332 : INFO : adding document #1350000 to Dictionary(333664 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:43,601 : INFO : adding document #1360000 to Dictionary(335153 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:43,850 : INFO : adding document #1370000 to Dictionary(336962 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:44,118 : INFO : adding document #1380000 to Dictionary(338540 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:44,389 : INFO : adding document #1390000 to Dictionary(339974 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:44,646 : INFO : adding document #1400000 to Dictionary(341332 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:44,914 : INFO : adding document #1410000 to Dictionary(342864 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:45,174 : INFO : adding document #1420000 to Dictionary(344362 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:45,422 : INFO : adding document #1430000 to Dictionary(345627 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:45,686 : INFO : adding document #1440000 to Dictionary(346909 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:45,943 : INFO : adding document #1450000 to Dictionary(348275 unique tokens: ['bladder', 'appreciat', 'pples', 'adib', 'twee']...)\n", + "2018-09-11 22:05:46,237 : INFO : adding document #1460000 to Dictionary(349755 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:46,507 : INFO : adding document #1470000 to Dictionary(351025 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:46,774 : INFO : adding document #1480000 to Dictionary(352258 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:47,031 : INFO : adding document #1490000 to Dictionary(353503 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:47,289 : INFO : adding document #1500000 to Dictionary(354943 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:47,560 : INFO : adding document #1510000 to Dictionary(356295 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:47,812 : INFO : adding document #1520000 to Dictionary(357459 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:48,079 : INFO : adding document #1530000 to Dictionary(358666 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:48,346 : INFO : adding document #1540000 to Dictionary(359986 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:48,611 : INFO : adding document #1550000 to Dictionary(361326 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:48,894 : INFO : adding document #1560000 to Dictionary(362609 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:49,157 : INFO : adding document #1570000 to Dictionary(363808 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:49,433 : INFO : adding document #1580000 to Dictionary(365172 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:49,717 : INFO : adding document #1590000 to Dictionary(366433 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:49,987 : INFO : adding document #1600000 to Dictionary(367968 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:50,243 : INFO : adding document #1610000 to Dictionary(369421 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:50,582 : INFO : adding document #1620000 to Dictionary(371631 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:05:50,846 : INFO : adding document #1630000 to Dictionary(372956 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:51,107 : INFO : adding document #1640000 to Dictionary(374282 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:51,382 : INFO : adding document #1650000 to Dictionary(375746 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:51,646 : INFO : adding document #1660000 to Dictionary(377073 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:51,910 : INFO : adding document #1670000 to Dictionary(378393 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:52,170 : INFO : adding document #1680000 to Dictionary(379812 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:52,433 : INFO : adding document #1690000 to Dictionary(380895 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:52,708 : INFO : adding document #1700000 to Dictionary(384739 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:52,969 : INFO : adding document #1710000 to Dictionary(386066 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:53,238 : INFO : adding document #1720000 to Dictionary(387270 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:53,497 : INFO : adding document #1730000 to Dictionary(388385 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:53,756 : INFO : adding document #1740000 to Dictionary(389687 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:54,016 : INFO : adding document #1750000 to Dictionary(390955 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:54,276 : INFO : adding document #1760000 to Dictionary(392540 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:54,540 : INFO : adding document #1770000 to Dictionary(393838 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:54,800 : INFO : adding document #1780000 to Dictionary(395032 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:55,063 : INFO : adding document #1790000 to Dictionary(396178 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:55,336 : INFO : adding document #1800000 to Dictionary(401637 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:55,599 : INFO : adding document #1810000 to Dictionary(402961 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:55,866 : INFO : adding document #1820000 to Dictionary(404423 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:56,128 : INFO : adding document #1830000 to Dictionary(405685 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:56,386 : INFO : adding document #1840000 to Dictionary(406830 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:56,641 : INFO : adding document #1850000 to Dictionary(408042 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:56,896 : INFO : adding document #1860000 to Dictionary(409402 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:57,164 : INFO : adding document #1870000 to Dictionary(410413 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:57,420 : INFO : adding document #1880000 to Dictionary(411819 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:57,689 : INFO : adding document #1890000 to Dictionary(412945 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:57,958 : INFO : adding document #1900000 to Dictionary(414272 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:58,218 : INFO : adding document #1910000 to Dictionary(415361 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:58,489 : INFO : adding document #1920000 to Dictionary(416731 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:58,763 : INFO : adding document #1930000 to Dictionary(419310 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:59,031 : INFO : adding document #1940000 to Dictionary(421794 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:59,308 : INFO : adding document #1950000 to Dictionary(423125 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:59,574 : INFO : adding document #1960000 to Dictionary(424191 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:05:59,836 : INFO : adding document #1970000 to Dictionary(425372 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:00,108 : INFO : adding document #1980000 to Dictionary(426641 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:00,371 : INFO : adding document #1990000 to Dictionary(427732 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:00,637 : INFO : adding document #2000000 to Dictionary(428904 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:00,901 : INFO : adding document #2010000 to Dictionary(429960 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:01,180 : INFO : adding document #2020000 to Dictionary(431271 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:01,455 : INFO : adding document #2030000 to Dictionary(432825 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:01,720 : INFO : adding document #2040000 to Dictionary(433994 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:01,996 : INFO : adding document #2050000 to Dictionary(436053 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:02,256 : INFO : adding document #2060000 to Dictionary(437115 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:02,524 : INFO : adding document #2070000 to Dictionary(438236 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:02,795 : INFO : adding document #2080000 to Dictionary(439512 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:03,064 : INFO : adding document #2090000 to Dictionary(440671 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:03,326 : INFO : adding document #2100000 to Dictionary(442053 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:03,583 : INFO : adding document #2110000 to Dictionary(443098 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:03,857 : INFO : adding document #2120000 to Dictionary(444469 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:04,119 : INFO : adding document #2130000 to Dictionary(445737 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:04,378 : INFO : adding document #2140000 to Dictionary(447128 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:04,636 : INFO : adding document #2150000 to Dictionary(448352 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:06:04,891 : INFO : adding document #2160000 to Dictionary(449397 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:05,175 : INFO : adding document #2170000 to Dictionary(450649 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:05,438 : INFO : adding document #2180000 to Dictionary(451840 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:05,714 : INFO : adding document #2190000 to Dictionary(453020 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:05,981 : INFO : adding document #2200000 to Dictionary(454160 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:06,248 : INFO : adding document #2210000 to Dictionary(455302 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:06,532 : INFO : adding document #2220000 to Dictionary(456657 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:06,798 : INFO : adding document #2230000 to Dictionary(457752 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:07,068 : INFO : adding document #2240000 to Dictionary(458938 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:07,343 : INFO : adding document #2250000 to Dictionary(460343 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:07,604 : INFO : adding document #2260000 to Dictionary(461426 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:07,860 : INFO : adding document #2270000 to Dictionary(462407 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...)\n", + "2018-09-11 22:06:07,973 : INFO : built Dictionary(462807 unique tokens: ['pples', 'adib', 'strangers', 'kolayaalee', 'softpoint']...) from 2274338 documents (total 40096354 corpus positions)\n", + "2018-09-11 22:06:09,432 : INFO : collecting all words and their counts\n", + "2018-09-11 22:06:09,433 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types\n", + "2018-09-11 22:06:09,466 : INFO : PROGRESS: at sentence #10000, processed 172808 words, keeping 20088 word types\n", + "2018-09-11 22:06:09,498 : INFO : PROGRESS: at sentence #20000, processed 345955 words, keeping 29692 word types\n", + "2018-09-11 22:06:09,535 : INFO : PROGRESS: at sentence #30000, processed 541552 words, keeping 37971 word types\n", + "2018-09-11 22:06:09,569 : INFO : PROGRESS: at sentence #40000, processed 705233 words, keeping 43930 word types\n", + "2018-09-11 22:06:09,601 : INFO : PROGRESS: at sentence #50000, processed 868029 words, keeping 49340 word types\n", + "2018-09-11 22:06:09,636 : INFO : PROGRESS: at sentence #60000, processed 1051114 words, keeping 54734 word types\n", + "2018-09-11 22:06:09,670 : INFO : PROGRESS: at sentence #70000, processed 1229423 words, keeping 59734 word types\n", + "2018-09-11 22:06:09,706 : INFO : PROGRESS: at sentence #80000, processed 1420566 words, keeping 64698 word types\n", + "2018-09-11 22:06:09,740 : INFO : PROGRESS: at sentence #90000, processed 1587554 words, keeping 68921 word types\n", + "2018-09-11 22:06:09,774 : INFO : PROGRESS: at sentence #100000, processed 1763790 words, keeping 74025 word types\n", + "2018-09-11 22:06:09,809 : INFO : PROGRESS: at sentence #110000, processed 1938499 words, keeping 78063 word types\n", + "2018-09-11 22:06:09,845 : INFO : PROGRESS: at sentence #120000, processed 2124701 words, keeping 81932 word types\n", + "2018-09-11 22:06:09,878 : INFO : PROGRESS: at sentence #130000, processed 2298491 words, keeping 85850 word types\n", + "2018-09-11 22:06:09,917 : INFO : PROGRESS: at sentence #140000, processed 2485247 words, keeping 89489 word types\n", + "2018-09-11 22:06:09,952 : INFO : PROGRESS: at sentence #150000, processed 2659752 words, keeping 93441 word types\n", + "2018-09-11 22:06:09,987 : INFO : PROGRESS: at sentence #160000, processed 2832892 words, keeping 97166 word types\n", + "2018-09-11 22:06:10,022 : INFO : PROGRESS: at sentence #170000, processed 3005814 words, keeping 100281 word types\n", + "2018-09-11 22:06:10,057 : INFO : PROGRESS: at sentence #180000, processed 3169805 words, keeping 103372 word types\n", + "2018-09-11 22:06:10,094 : INFO : PROGRESS: at sentence #190000, processed 3347859 words, keeping 106627 word types\n", + "2018-09-11 22:06:10,131 : INFO : PROGRESS: at sentence #200000, processed 3527666 words, keeping 110902 word types\n", + "2018-09-11 22:06:10,166 : INFO : PROGRESS: at sentence #210000, processed 3703354 words, keeping 113686 word types\n", + "2018-09-11 22:06:10,202 : INFO : PROGRESS: at sentence #220000, processed 3881882 words, keeping 117110 word types\n", + "2018-09-11 22:06:10,235 : INFO : PROGRESS: at sentence #230000, processed 4050419 words, keeping 119961 word types\n", + "2018-09-11 22:06:10,272 : INFO : PROGRESS: at sentence #240000, processed 4232284 words, keeping 123182 word types\n", + "2018-09-11 22:06:10,306 : INFO : PROGRESS: at sentence #250000, processed 4400084 words, keeping 125952 word types\n", + "2018-09-11 22:06:10,342 : INFO : PROGRESS: at sentence #260000, processed 4582320 words, keeping 128806 word types\n", + "2018-09-11 22:06:10,376 : INFO : PROGRESS: at sentence #270000, processed 4750501 words, keeping 131361 word types\n", + "2018-09-11 22:06:10,411 : INFO : PROGRESS: at sentence #280000, processed 4922559 words, keeping 133942 word types\n", + "2018-09-11 22:06:10,446 : INFO : PROGRESS: at sentence #290000, processed 5090547 words, keeping 136306 word types\n", + "2018-09-11 22:06:10,481 : INFO : PROGRESS: at sentence #300000, processed 5263679 words, keeping 138957 word types\n", + "2018-09-11 22:06:10,518 : INFO : PROGRESS: at sentence #310000, processed 5446459 words, keeping 141490 word types\n", + "2018-09-11 22:06:10,554 : INFO : PROGRESS: at sentence #320000, processed 5623621 words, keeping 144071 word types\n", + "2018-09-11 22:06:10,588 : INFO : PROGRESS: at sentence #330000, processed 5792646 words, keeping 146510 word types\n", + "2018-09-11 22:06:10,622 : INFO : PROGRESS: at sentence #340000, processed 5958987 words, keeping 149053 word types\n", + "2018-09-11 22:06:10,659 : INFO : PROGRESS: at sentence #350000, processed 6151645 words, keeping 151463 word types\n", + "2018-09-11 22:06:10,695 : INFO : PROGRESS: at sentence #360000, processed 6327069 words, keeping 153612 word types\n", + "2018-09-11 22:06:10,730 : INFO : PROGRESS: at sentence #370000, processed 6496792 words, keeping 156234 word types\n", + "2018-09-11 22:06:10,770 : INFO : PROGRESS: at sentence #380000, processed 6704748 words, keeping 158845 word types\n", + "2018-09-11 22:06:10,804 : INFO : PROGRESS: at sentence #390000, processed 6879316 words, keeping 161029 word types\n", + "2018-09-11 22:06:10,837 : INFO : PROGRESS: at sentence #400000, processed 7045482 words, keeping 163444 word types\n", + "2018-09-11 22:06:10,873 : INFO : PROGRESS: at sentence #410000, processed 7230856 words, keeping 165551 word types\n", + "2018-09-11 22:06:10,907 : INFO : PROGRESS: at sentence #420000, processed 7407466 words, keeping 167864 word types\n", + "2018-09-11 22:06:10,942 : INFO : PROGRESS: at sentence #430000, processed 7589188 words, keeping 169982 word types\n", + "2018-09-11 22:06:10,977 : INFO : PROGRESS: at sentence #440000, processed 7773096 words, keeping 172106 word types\n", + "2018-09-11 22:06:11,009 : INFO : PROGRESS: at sentence #450000, processed 7932149 words, keeping 174128 word types\n", + "2018-09-11 22:06:11,047 : INFO : PROGRESS: at sentence #460000, processed 8098234 words, keeping 176267 word types\n", + "2018-09-11 22:06:11,082 : INFO : PROGRESS: at sentence #470000, processed 8272686 words, keeping 178429 word types\n", + "2018-09-11 22:06:11,116 : INFO : PROGRESS: at sentence #480000, processed 8450596 words, keeping 180738 word types\n", + "2018-09-11 22:06:11,152 : INFO : PROGRESS: at sentence #490000, processed 8626881 words, keeping 182982 word types\n", + "2018-09-11 22:06:11,187 : INFO : PROGRESS: at sentence #500000, processed 8803988 words, keeping 184754 word types\n", + "2018-09-11 22:06:11,223 : INFO : PROGRESS: at sentence #510000, processed 8988004 words, keeping 187327 word types\n", + "2018-09-11 22:06:11,259 : INFO : PROGRESS: at sentence #520000, processed 9169435 words, keeping 189327 word types\n", + "2018-09-11 22:06:11,293 : INFO : PROGRESS: at sentence #530000, processed 9338537 words, keeping 191219 word types\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:06:11,327 : INFO : PROGRESS: at sentence #540000, processed 9513704 words, keeping 193182 word types\n", + "2018-09-11 22:06:11,364 : INFO : PROGRESS: at sentence #550000, processed 9700882 words, keeping 195951 word types\n", + "2018-09-11 22:06:11,401 : INFO : PROGRESS: at sentence #560000, processed 9892043 words, keeping 197956 word types\n", + "2018-09-11 22:06:11,439 : INFO : PROGRESS: at sentence #570000, processed 10082223 words, keeping 200145 word types\n", + "2018-09-11 22:06:11,474 : INFO : PROGRESS: at sentence #580000, processed 10249508 words, keeping 201859 word types\n", + "2018-09-11 22:06:11,507 : INFO : PROGRESS: at sentence #590000, processed 10413550 words, keeping 203724 word types\n", + "2018-09-11 22:06:11,541 : INFO : PROGRESS: at sentence #600000, processed 10583886 words, keeping 205607 word types\n", + "2018-09-11 22:06:11,577 : INFO : PROGRESS: at sentence #610000, processed 10761502 words, keeping 207387 word types\n", + "2018-09-11 22:06:11,612 : INFO : PROGRESS: at sentence #620000, processed 10937476 words, keeping 209246 word types\n", + "2018-09-11 22:06:11,645 : INFO : PROGRESS: at sentence #630000, processed 11103087 words, keeping 211094 word types\n", + "2018-09-11 22:06:11,679 : INFO : PROGRESS: at sentence #640000, processed 11271558 words, keeping 212963 word types\n", + "2018-09-11 22:06:11,712 : INFO : PROGRESS: at sentence #650000, processed 11438866 words, keeping 214666 word types\n", + "2018-09-11 22:06:11,747 : INFO : PROGRESS: at sentence #660000, processed 11616418 words, keeping 216409 word types\n", + "2018-09-11 22:06:11,783 : INFO : PROGRESS: at sentence #670000, processed 11798489 words, keeping 218264 word types\n", + "2018-09-11 22:06:11,818 : INFO : PROGRESS: at sentence #680000, processed 11970418 words, keeping 220129 word types\n", + "2018-09-11 22:06:11,858 : INFO : PROGRESS: at sentence #690000, processed 12175811 words, keeping 222075 word types\n", + "2018-09-11 22:06:11,892 : INFO : PROGRESS: at sentence #700000, processed 12343559 words, keeping 223880 word types\n", + "2018-09-11 22:06:11,934 : INFO : PROGRESS: at sentence #710000, processed 12565565 words, keeping 225982 word types\n", + "2018-09-11 22:06:11,968 : INFO : PROGRESS: at sentence #720000, processed 12736259 words, keeping 227672 word types\n", + "2018-09-11 22:06:12,004 : INFO : PROGRESS: at sentence #730000, processed 12910946 words, keeping 229371 word types\n", + "2018-09-11 22:06:12,039 : INFO : PROGRESS: at sentence #740000, processed 13086533 words, keeping 231078 word types\n", + "2018-09-11 22:06:12,072 : INFO : PROGRESS: at sentence #750000, processed 13252162 words, keeping 232982 word types\n", + "2018-09-11 22:06:12,108 : INFO : PROGRESS: at sentence #760000, processed 13430188 words, keeping 234746 word types\n", + "2018-09-11 22:06:12,142 : INFO : PROGRESS: at sentence #770000, processed 13599380 words, keeping 236494 word types\n", + "2018-09-11 22:06:12,175 : INFO : PROGRESS: at sentence #780000, processed 13761922 words, keeping 238199 word types\n", + "2018-09-11 22:06:12,212 : INFO : PROGRESS: at sentence #790000, processed 13939964 words, keeping 240021 word types\n", + "2018-09-11 22:06:12,254 : INFO : PROGRESS: at sentence #800000, processed 14171975 words, keeping 242280 word types\n", + "2018-09-11 22:06:12,291 : INFO : PROGRESS: at sentence #810000, processed 14355425 words, keeping 244318 word types\n", + "2018-09-11 22:06:12,327 : INFO : PROGRESS: at sentence #820000, processed 14535275 words, keeping 246133 word types\n", + "2018-09-11 22:06:12,363 : INFO : PROGRESS: at sentence #830000, processed 14712909 words, keeping 247703 word types\n", + "2018-09-11 22:06:12,398 : INFO : PROGRESS: at sentence #840000, processed 14884617 words, keeping 249458 word types\n", + "2018-09-11 22:06:12,433 : INFO : PROGRESS: at sentence #850000, processed 15062312 words, keeping 251278 word types\n", + "2018-09-11 22:06:12,469 : INFO : PROGRESS: at sentence #860000, processed 15240611 words, keeping 252971 word types\n", + "2018-09-11 22:06:12,506 : INFO : PROGRESS: at sentence #870000, processed 15425624 words, keeping 254789 word types\n", + "2018-09-11 22:06:12,541 : INFO : PROGRESS: at sentence #880000, processed 15599153 words, keeping 256483 word types\n", + "2018-09-11 22:06:12,575 : INFO : PROGRESS: at sentence #890000, processed 15766709 words, keeping 258416 word types\n", + "2018-09-11 22:06:12,611 : INFO : PROGRESS: at sentence #900000, processed 15946022 words, keeping 260098 word types\n", + "2018-09-11 22:06:12,644 : INFO : PROGRESS: at sentence #910000, processed 16109571 words, keeping 261700 word types\n", + "2018-09-11 22:06:12,679 : INFO : PROGRESS: at sentence #920000, processed 16285569 words, keeping 263313 word types\n", + "2018-09-11 22:06:12,714 : INFO : PROGRESS: at sentence #930000, processed 16459265 words, keeping 264839 word types\n", + "2018-09-11 22:06:12,749 : INFO : PROGRESS: at sentence #940000, processed 16630795 words, keeping 266327 word types\n", + "2018-09-11 22:06:12,784 : INFO : PROGRESS: at sentence #950000, processed 16809469 words, keeping 267891 word types\n", + "2018-09-11 22:06:12,821 : INFO : PROGRESS: at sentence #960000, processed 16991225 words, keeping 270437 word types\n", + "2018-09-11 22:06:12,857 : INFO : PROGRESS: at sentence #970000, processed 17166201 words, keeping 272420 word types\n", + "2018-09-11 22:06:12,891 : INFO : PROGRESS: at sentence #980000, processed 17337011 words, keeping 274058 word types\n", + "2018-09-11 22:06:12,928 : INFO : PROGRESS: at sentence #990000, processed 17518793 words, keeping 275579 word types\n", + "2018-09-11 22:06:12,963 : INFO : PROGRESS: at sentence #1000000, processed 17695697 words, keeping 277402 word types\n", + "2018-09-11 22:06:12,999 : INFO : PROGRESS: at sentence #1010000, processed 17875525 words, keeping 279035 word types\n", + "2018-09-11 22:06:13,035 : INFO : PROGRESS: at sentence #1020000, processed 18055144 words, keeping 280584 word types\n", + "2018-09-11 22:06:13,070 : INFO : PROGRESS: at sentence #1030000, processed 18231637 words, keeping 282206 word types\n", + "2018-09-11 22:06:13,104 : INFO : PROGRESS: at sentence #1040000, processed 18398878 words, keeping 283570 word types\n", + "2018-09-11 22:06:13,141 : INFO : PROGRESS: at sentence #1050000, processed 18584353 words, keeping 285112 word types\n", + "2018-09-11 22:06:13,175 : INFO : PROGRESS: at sentence #1060000, processed 18750269 words, keeping 286666 word types\n", + "2018-09-11 22:06:13,211 : INFO : PROGRESS: at sentence #1070000, processed 18929960 words, keeping 288122 word types\n", + "2018-09-11 22:06:13,246 : INFO : PROGRESS: at sentence #1080000, processed 19102851 words, keeping 289489 word types\n", + "2018-09-11 22:06:13,282 : INFO : PROGRESS: at sentence #1090000, processed 19278476 words, keeping 291139 word types\n", + "2018-09-11 22:06:13,319 : INFO : PROGRESS: at sentence #1100000, processed 19463665 words, keeping 293838 word types\n", + "2018-09-11 22:06:13,354 : INFO : PROGRESS: at sentence #1110000, processed 19635307 words, keeping 295273 word types\n", + "2018-09-11 22:06:13,389 : INFO : PROGRESS: at sentence #1120000, processed 19812865 words, keeping 296816 word types\n", + "2018-09-11 22:06:13,424 : INFO : PROGRESS: at sentence #1130000, processed 19983578 words, keeping 298552 word types\n", + "2018-09-11 22:06:13,449 : INFO : PROGRESS: at sentence #1140000, processed 20106292 words, keeping 299628 word types\n", + "2018-09-11 22:06:13,484 : INFO : PROGRESS: at sentence #1150000, processed 20281010 words, keeping 301139 word types\n", + "2018-09-11 22:06:13,521 : INFO : PROGRESS: at sentence #1160000, processed 20463992 words, keeping 302566 word types\n", + "2018-09-11 22:06:13,557 : INFO : PROGRESS: at sentence #1170000, processed 20639845 words, keeping 304039 word types\n", + "2018-09-11 22:06:13,593 : INFO : PROGRESS: at sentence #1180000, processed 20809920 words, keeping 305503 word types\n", + "2018-09-11 22:06:13,628 : INFO : PROGRESS: at sentence #1190000, processed 20984247 words, keeping 307005 word types\n", + "2018-09-11 22:06:13,664 : INFO : PROGRESS: at sentence #1200000, processed 21163937 words, keeping 308842 word types\n", + "2018-09-11 22:06:13,702 : INFO : PROGRESS: at sentence #1210000, processed 21353983 words, keeping 310414 word types\n", + "2018-09-11 22:06:13,739 : INFO : PROGRESS: at sentence #1220000, processed 21534830 words, keeping 312012 word types\n", + "2018-09-11 22:06:13,774 : INFO : PROGRESS: at sentence #1230000, processed 21709272 words, keeping 313850 word types\n", + "2018-09-11 22:06:13,812 : INFO : PROGRESS: at sentence #1240000, processed 21894484 words, keeping 315829 word types\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:06:13,847 : INFO : PROGRESS: at sentence #1250000, processed 22068690 words, keeping 317188 word types\n", + "2018-09-11 22:06:13,882 : INFO : PROGRESS: at sentence #1260000, processed 22244101 words, keeping 318577 word types\n", + "2018-09-11 22:06:13,915 : INFO : PROGRESS: at sentence #1270000, processed 22407248 words, keeping 320245 word types\n", + "2018-09-11 22:06:13,953 : INFO : PROGRESS: at sentence #1280000, processed 22594585 words, keeping 321715 word types\n", + "2018-09-11 22:06:13,989 : INFO : PROGRESS: at sentence #1290000, processed 22771530 words, keeping 323216 word types\n", + "2018-09-11 22:06:14,027 : INFO : PROGRESS: at sentence #1300000, processed 22963365 words, keeping 324767 word types\n", + "2018-09-11 22:06:14,061 : INFO : PROGRESS: at sentence #1310000, processed 23129072 words, keeping 326386 word types\n", + "2018-09-11 22:06:14,107 : INFO : PROGRESS: at sentence #1320000, processed 23362428 words, keeping 329383 word types\n", + "2018-09-11 22:06:14,140 : INFO : PROGRESS: at sentence #1330000, processed 23523119 words, keeping 330810 word types\n", + "2018-09-11 22:06:14,175 : INFO : PROGRESS: at sentence #1340000, processed 23697659 words, keeping 332299 word types\n", + "2018-09-11 22:06:14,210 : INFO : PROGRESS: at sentence #1350000, processed 23867127 words, keeping 333664 word types\n", + "2018-09-11 22:06:14,246 : INFO : PROGRESS: at sentence #1360000, processed 24046933 words, keeping 335153 word types\n", + "2018-09-11 22:06:14,279 : INFO : PROGRESS: at sentence #1370000, processed 24206322 words, keeping 336962 word types\n", + "2018-09-11 22:06:14,316 : INFO : PROGRESS: at sentence #1380000, processed 24383841 words, keeping 338540 word types\n", + "2018-09-11 22:06:14,353 : INFO : PROGRESS: at sentence #1390000, processed 24567114 words, keeping 339974 word types\n", + "2018-09-11 22:06:14,387 : INFO : PROGRESS: at sentence #1400000, processed 24733374 words, keeping 341332 word types\n", + "2018-09-11 22:06:14,423 : INFO : PROGRESS: at sentence #1410000, processed 24911546 words, keeping 342864 word types\n", + "2018-09-11 22:06:14,458 : INFO : PROGRESS: at sentence #1420000, processed 25081231 words, keeping 344362 word types\n", + "2018-09-11 22:06:14,491 : INFO : PROGRESS: at sentence #1430000, processed 25241099 words, keeping 345627 word types\n", + "2018-09-11 22:06:14,526 : INFO : PROGRESS: at sentence #1440000, processed 25416659 words, keeping 346909 word types\n", + "2018-09-11 22:06:14,560 : INFO : PROGRESS: at sentence #1450000, processed 25584691 words, keeping 348275 word types\n", + "2018-09-11 22:06:14,614 : INFO : PROGRESS: at sentence #1460000, processed 25763232 words, keeping 349755 word types\n", + "2018-09-11 22:06:14,651 : INFO : PROGRESS: at sentence #1470000, processed 25943095 words, keeping 351025 word types\n", + "2018-09-11 22:06:14,687 : INFO : PROGRESS: at sentence #1480000, processed 26119119 words, keeping 352258 word types\n", + "2018-09-11 22:06:14,722 : INFO : PROGRESS: at sentence #1490000, processed 26285945 words, keeping 353503 word types\n", + "2018-09-11 22:06:14,756 : INFO : PROGRESS: at sentence #1500000, processed 26454584 words, keeping 354943 word types\n", + "2018-09-11 22:06:14,794 : INFO : PROGRESS: at sentence #1510000, processed 26637032 words, keeping 356295 word types\n", + "2018-09-11 22:06:14,827 : INFO : PROGRESS: at sentence #1520000, processed 26799446 words, keeping 357459 word types\n", + "2018-09-11 22:06:14,863 : INFO : PROGRESS: at sentence #1530000, processed 26978249 words, keeping 358666 word types\n", + "2018-09-11 22:06:14,900 : INFO : PROGRESS: at sentence #1540000, processed 27153165 words, keeping 359986 word types\n", + "2018-09-11 22:06:14,936 : INFO : PROGRESS: at sentence #1550000, processed 27328146 words, keeping 361326 word types\n", + "2018-09-11 22:06:14,975 : INFO : PROGRESS: at sentence #1560000, processed 27519824 words, keeping 362609 word types\n", + "2018-09-11 22:06:15,010 : INFO : PROGRESS: at sentence #1570000, processed 27694120 words, keeping 363808 word types\n", + "2018-09-11 22:06:15,049 : INFO : PROGRESS: at sentence #1580000, processed 27882692 words, keeping 365172 word types\n", + "2018-09-11 22:06:15,088 : INFO : PROGRESS: at sentence #1590000, processed 28078298 words, keeping 366433 word types\n", + "2018-09-11 22:06:15,125 : INFO : PROGRESS: at sentence #1600000, processed 28259932 words, keeping 367968 word types\n", + "2018-09-11 22:06:15,160 : INFO : PROGRESS: at sentence #1610000, processed 28425483 words, keeping 369421 word types\n", + "2018-09-11 22:06:15,195 : INFO : PROGRESS: at sentence #1620000, processed 28595513 words, keeping 371631 word types\n", + "2018-09-11 22:06:15,231 : INFO : PROGRESS: at sentence #1630000, processed 28769505 words, keeping 372956 word types\n", + "2018-09-11 22:06:15,265 : INFO : PROGRESS: at sentence #1640000, processed 28939090 words, keeping 374282 word types\n", + "2018-09-11 22:06:15,303 : INFO : PROGRESS: at sentence #1650000, processed 29125521 words, keeping 375746 word types\n", + "2018-09-11 22:06:15,339 : INFO : PROGRESS: at sentence #1660000, processed 29299976 words, keeping 377073 word types\n", + "2018-09-11 22:06:15,375 : INFO : PROGRESS: at sentence #1670000, processed 29475570 words, keeping 378393 word types\n", + "2018-09-11 22:06:15,410 : INFO : PROGRESS: at sentence #1680000, processed 29645432 words, keeping 379812 word types\n", + "2018-09-11 22:06:15,445 : INFO : PROGRESS: at sentence #1690000, processed 29818315 words, keeping 380895 word types\n", + "2018-09-11 22:06:15,483 : INFO : PROGRESS: at sentence #1700000, processed 30005971 words, keeping 384739 word types\n", + "2018-09-11 22:06:15,519 : INFO : PROGRESS: at sentence #1710000, processed 30176935 words, keeping 386066 word types\n", + "2018-09-11 22:06:15,555 : INFO : PROGRESS: at sentence #1720000, processed 30353224 words, keeping 387270 word types\n", + "2018-09-11 22:06:15,590 : INFO : PROGRESS: at sentence #1730000, processed 30524099 words, keeping 388385 word types\n", + "2018-09-11 22:06:15,625 : INFO : PROGRESS: at sentence #1740000, processed 30694784 words, keeping 389687 word types\n", + "2018-09-11 22:06:15,660 : INFO : PROGRESS: at sentence #1750000, processed 30865134 words, keeping 390955 word types\n", + "2018-09-11 22:06:15,696 : INFO : PROGRESS: at sentence #1760000, processed 31036964 words, keeping 392540 word types\n", + "2018-09-11 22:06:15,733 : INFO : PROGRESS: at sentence #1770000, processed 31209787 words, keeping 393838 word types\n", + "2018-09-11 22:06:15,768 : INFO : PROGRESS: at sentence #1780000, processed 31381693 words, keeping 395032 word types\n", + "2018-09-11 22:06:15,804 : INFO : PROGRESS: at sentence #1790000, processed 31554124 words, keeping 396178 word types\n", + "2018-09-11 22:06:15,841 : INFO : PROGRESS: at sentence #1800000, processed 31729616 words, keeping 401637 word types\n", + "2018-09-11 22:06:15,876 : INFO : PROGRESS: at sentence #1810000, processed 31902990 words, keeping 402961 word types\n", + "2018-09-11 22:06:15,913 : INFO : PROGRESS: at sentence #1820000, processed 32081329 words, keeping 404423 word types\n", + "2018-09-11 22:06:15,948 : INFO : PROGRESS: at sentence #1830000, processed 32252980 words, keeping 405685 word types\n", + "2018-09-11 22:06:15,983 : INFO : PROGRESS: at sentence #1840000, processed 32421414 words, keeping 406830 word types\n", + "2018-09-11 22:06:16,017 : INFO : PROGRESS: at sentence #1850000, processed 32585345 words, keeping 408042 word types\n", + "2018-09-11 22:06:16,051 : INFO : PROGRESS: at sentence #1860000, processed 32748405 words, keeping 409402 word types\n", + "2018-09-11 22:06:16,087 : INFO : PROGRESS: at sentence #1870000, processed 32926235 words, keeping 410413 word types\n", + "2018-09-11 22:06:16,122 : INFO : PROGRESS: at sentence #1880000, processed 33090777 words, keeping 411819 word types\n", + "2018-09-11 22:06:16,158 : INFO : PROGRESS: at sentence #1890000, processed 33269427 words, keeping 412945 word types\n", + "2018-09-11 22:06:16,196 : INFO : PROGRESS: at sentence #1900000, processed 33450367 words, keeping 414272 word types\n", + "2018-09-11 22:06:16,231 : INFO : PROGRESS: at sentence #1910000, processed 33618588 words, keeping 415361 word types\n", + "2018-09-11 22:06:16,268 : INFO : PROGRESS: at sentence #1920000, processed 33803241 words, keeping 416731 word types\n", + "2018-09-11 22:06:16,306 : INFO : PROGRESS: at sentence #1930000, processed 33985967 words, keeping 419310 word types\n", + "2018-09-11 22:06:16,343 : INFO : PROGRESS: at sentence #1940000, processed 34169107 words, keeping 421794 word types\n", + "2018-09-11 22:06:16,382 : INFO : PROGRESS: at sentence #1950000, processed 34356523 words, keeping 423125 word types\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:06:16,419 : INFO : PROGRESS: at sentence #1960000, processed 34532003 words, keeping 424191 word types\n", + "2018-09-11 22:06:16,455 : INFO : PROGRESS: at sentence #1970000, processed 34704604 words, keeping 425372 word types\n", + "2018-09-11 22:06:16,493 : INFO : PROGRESS: at sentence #1980000, processed 34886895 words, keeping 426641 word types\n", + "2018-09-11 22:06:16,528 : INFO : PROGRESS: at sentence #1990000, processed 35059892 words, keeping 427732 word types\n", + "2018-09-11 22:06:16,565 : INFO : PROGRESS: at sentence #2000000, processed 35237154 words, keeping 428904 word types\n", + "2018-09-11 22:06:16,601 : INFO : PROGRESS: at sentence #2010000, processed 35409658 words, keeping 429960 word types\n", + "2018-09-11 22:06:16,639 : INFO : PROGRESS: at sentence #2020000, processed 35599655 words, keeping 431271 word types\n", + "2018-09-11 22:06:16,677 : INFO : PROGRESS: at sentence #2030000, processed 35788909 words, keeping 432825 word types\n", + "2018-09-11 22:06:16,713 : INFO : PROGRESS: at sentence #2040000, processed 35960123 words, keeping 433994 word types\n", + "2018-09-11 22:06:16,751 : INFO : PROGRESS: at sentence #2050000, processed 36145529 words, keeping 436053 word types\n", + "2018-09-11 22:06:16,786 : INFO : PROGRESS: at sentence #2060000, processed 36317031 words, keeping 437115 word types\n", + "2018-09-11 22:06:16,823 : INFO : PROGRESS: at sentence #2070000, processed 36494774 words, keeping 438236 word types\n", + "2018-09-11 22:06:16,860 : INFO : PROGRESS: at sentence #2080000, processed 36675860 words, keeping 439512 word types\n", + "2018-09-11 22:06:16,896 : INFO : PROGRESS: at sentence #2090000, processed 36852776 words, keeping 440671 word types\n", + "2018-09-11 22:06:16,932 : INFO : PROGRESS: at sentence #2100000, processed 37026875 words, keeping 442053 word types\n", + "2018-09-11 22:06:16,967 : INFO : PROGRESS: at sentence #2110000, processed 37193537 words, keeping 443098 word types\n", + "2018-09-11 22:06:17,004 : INFO : PROGRESS: at sentence #2120000, processed 37376099 words, keeping 444469 word types\n", + "2018-09-11 22:06:17,039 : INFO : PROGRESS: at sentence #2130000, processed 37548033 words, keeping 445737 word types\n", + "2018-09-11 22:06:17,075 : INFO : PROGRESS: at sentence #2140000, processed 37717063 words, keeping 447128 word types\n", + "2018-09-11 22:06:17,110 : INFO : PROGRESS: at sentence #2150000, processed 37884639 words, keeping 448352 word types\n", + "2018-09-11 22:06:17,144 : INFO : PROGRESS: at sentence #2160000, processed 38049158 words, keeping 449397 word types\n", + "2018-09-11 22:06:17,183 : INFO : PROGRESS: at sentence #2170000, processed 38241528 words, keeping 450649 word types\n", + "2018-09-11 22:06:17,219 : INFO : PROGRESS: at sentence #2180000, processed 38413267 words, keeping 451840 word types\n", + "2018-09-11 22:06:17,257 : INFO : PROGRESS: at sentence #2190000, processed 38599896 words, keeping 453020 word types\n", + "2018-09-11 22:06:17,293 : INFO : PROGRESS: at sentence #2200000, processed 38774142 words, keeping 454160 word types\n", + "2018-09-11 22:06:17,329 : INFO : PROGRESS: at sentence #2210000, processed 38952307 words, keeping 455302 word types\n", + "2018-09-11 22:06:17,369 : INFO : PROGRESS: at sentence #2220000, processed 39148420 words, keeping 456657 word types\n", + "2018-09-11 22:06:17,405 : INFO : PROGRESS: at sentence #2230000, processed 39323321 words, keeping 457752 word types\n", + "2018-09-11 22:06:17,442 : INFO : PROGRESS: at sentence #2240000, processed 39503997 words, keeping 458938 word types\n", + "2018-09-11 22:06:17,479 : INFO : PROGRESS: at sentence #2250000, processed 39687270 words, keeping 460343 word types\n", + "2018-09-11 22:06:17,515 : INFO : PROGRESS: at sentence #2260000, processed 39858294 words, keeping 461426 word types\n", + "2018-09-11 22:06:17,549 : INFO : PROGRESS: at sentence #2270000, processed 40024958 words, keeping 462407 word types\n", + "2018-09-11 22:06:17,564 : INFO : collected 462807 word types from a corpus of 40096354 raw words and 2274338 sentences\n", + "2018-09-11 22:06:17,565 : INFO : Loading a fresh vocabulary\n", + "2018-09-11 22:06:18,002 : INFO : effective_min_count=5 retains 104360 unique words (22% of original 462807, drops 358447)\n", + "2018-09-11 22:06:18,003 : INFO : effective_min_count=5 leaves 39565168 word corpus (98% of original 40096354, drops 531186)\n", + "2018-09-11 22:06:18,454 : INFO : deleting the raw counts dictionary of 462807 items\n", + "2018-09-11 22:06:18,474 : INFO : sample=0.001 downsamples 22 most-common words\n", + "2018-09-11 22:06:18,475 : INFO : downsampling leaves estimated 38552993 word corpus (97.4% of prior 39565168)\n", + "2018-09-11 22:06:18,907 : INFO : estimated required memory for 104360 words and 300 dimensions: 302644000 bytes\n", + "2018-09-11 22:06:18,908 : INFO : resetting layer weights\n", + "2018-09-11 22:06:21,082 : INFO : training model with 32 workers on 104360 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=5 window=5\n", + "2018-09-11 22:06:22,116 : INFO : EPOCH 1 - PROGRESS: at 2.77% examples, 1054327 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:23,137 : INFO : EPOCH 1 - PROGRESS: at 5.94% examples, 1132099 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:06:24,161 : INFO : EPOCH 1 - PROGRESS: at 9.06% examples, 1144466 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:06:25,184 : INFO : EPOCH 1 - PROGRESS: at 12.20% examples, 1151277 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:06:26,202 : INFO : EPOCH 1 - PROGRESS: at 15.42% examples, 1160248 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:27,203 : INFO : EPOCH 1 - PROGRESS: at 18.32% examples, 1158724 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:28,211 : INFO : EPOCH 1 - PROGRESS: at 21.40% examples, 1160369 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:29,229 : INFO : EPOCH 1 - PROGRESS: at 24.45% examples, 1162368 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:06:30,238 : INFO : EPOCH 1 - PROGRESS: at 27.52% examples, 1162801 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:31,251 : INFO : EPOCH 1 - PROGRESS: at 30.58% examples, 1163452 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:06:32,256 : INFO : EPOCH 1 - PROGRESS: at 33.41% examples, 1158946 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:33,264 : INFO : EPOCH 1 - PROGRESS: at 36.60% examples, 1164780 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:34,272 : INFO : EPOCH 1 - PROGRESS: at 39.65% examples, 1164187 words/s, in_qsize 63, out_qsize 1\n", + "2018-09-11 22:06:35,284 : INFO : EPOCH 1 - PROGRESS: at 42.71% examples, 1163589 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:06:36,295 : INFO : EPOCH 1 - PROGRESS: at 45.79% examples, 1164858 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:06:37,298 : INFO : EPOCH 1 - PROGRESS: at 48.91% examples, 1167233 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:06:38,311 : INFO : EPOCH 1 - PROGRESS: at 52.14% examples, 1166844 words/s, in_qsize 64, out_qsize 3\n", + "2018-09-11 22:06:39,315 : INFO : EPOCH 1 - PROGRESS: at 55.13% examples, 1167767 words/s, in_qsize 63, out_qsize 4\n", + "2018-09-11 22:06:40,333 : INFO : EPOCH 1 - PROGRESS: at 58.19% examples, 1169670 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:41,358 : INFO : EPOCH 1 - PROGRESS: at 61.23% examples, 1167139 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:42,367 : INFO : EPOCH 1 - PROGRESS: at 64.42% examples, 1168112 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:06:43,371 : INFO : EPOCH 1 - PROGRESS: at 67.42% examples, 1166679 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:06:44,372 : INFO : EPOCH 1 - PROGRESS: at 70.45% examples, 1168340 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:45,380 : INFO : EPOCH 1 - PROGRESS: at 73.49% examples, 1167585 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:46,384 : INFO : EPOCH 1 - PROGRESS: at 76.69% examples, 1169309 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:47,406 : INFO : EPOCH 1 - PROGRESS: at 79.79% examples, 1168502 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:48,407 : INFO : EPOCH 1 - PROGRESS: at 82.83% examples, 1166740 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:06:49,415 : INFO : EPOCH 1 - PROGRESS: at 85.97% examples, 1168616 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:06:50,440 : INFO : EPOCH 1 - PROGRESS: at 89.02% examples, 1168592 words/s, in_qsize 57, out_qsize 6\n", + "2018-09-11 22:06:51,451 : INFO : EPOCH 1 - PROGRESS: at 92.10% examples, 1169073 words/s, in_qsize 62, out_qsize 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:06:52,466 : INFO : EPOCH 1 - PROGRESS: at 95.26% examples, 1169387 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:06:53,495 : INFO : EPOCH 1 - PROGRESS: at 98.42% examples, 1170643 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:06:53,702 : INFO : worker thread finished; awaiting finish of 31 more threads\n", + "2018-09-11 22:06:53,709 : INFO : worker thread finished; awaiting finish of 30 more threads\n", + "2018-09-11 22:06:53,734 : INFO : worker thread finished; awaiting finish of 29 more threads\n", + "2018-09-11 22:06:53,747 : INFO : worker thread finished; awaiting finish of 28 more threads\n", + "2018-09-11 22:06:53,763 : INFO : worker thread finished; awaiting finish of 27 more threads\n", + "2018-09-11 22:06:53,772 : INFO : worker thread finished; awaiting finish of 26 more threads\n", + "2018-09-11 22:06:53,790 : INFO : worker thread finished; awaiting finish of 25 more threads\n", + "2018-09-11 22:06:53,801 : INFO : worker thread finished; awaiting finish of 24 more threads\n", + "2018-09-11 22:06:53,818 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2018-09-11 22:06:53,820 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2018-09-11 22:06:53,822 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2018-09-11 22:06:53,841 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2018-09-11 22:06:53,844 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2018-09-11 22:06:53,845 : INFO : worker thread finished; awaiting finish of 18 more threads\n", + "2018-09-11 22:06:53,848 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2018-09-11 22:06:53,849 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2018-09-11 22:06:53,850 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2018-09-11 22:06:53,851 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2018-09-11 22:06:53,851 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2018-09-11 22:06:53,852 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2018-09-11 22:06:53,853 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2018-09-11 22:06:53,861 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2018-09-11 22:06:53,864 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2018-09-11 22:06:53,865 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2018-09-11 22:06:53,866 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2018-09-11 22:06:53,867 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2018-09-11 22:06:53,873 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2018-09-11 22:06:53,882 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2018-09-11 22:06:53,883 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2018-09-11 22:06:53,886 : INFO : worker thread finished; awaiting finish of 2 more threads\n", + "2018-09-11 22:06:53,893 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2018-09-11 22:06:53,894 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2018-09-11 22:06:53,894 : INFO : EPOCH - 1 : training on 40096354 raw words (38515351 effective words) took 32.8s, 1174692 effective words/s\n", + "2018-09-11 22:06:54,917 : INFO : EPOCH 2 - PROGRESS: at 2.19% examples, 831263 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:06:55,937 : INFO : EPOCH 2 - PROGRESS: at 5.26% examples, 1008453 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:56,952 : INFO : EPOCH 2 - PROGRESS: at 8.43% examples, 1068452 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:06:57,961 : INFO : EPOCH 2 - PROGRESS: at 11.42% examples, 1086419 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:06:58,972 : INFO : EPOCH 2 - PROGRESS: at 14.52% examples, 1102434 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:06:59,973 : INFO : EPOCH 2 - PROGRESS: at 17.64% examples, 1123054 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:00,979 : INFO : EPOCH 2 - PROGRESS: at 20.77% examples, 1131615 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:01,988 : INFO : EPOCH 2 - PROGRESS: at 23.77% examples, 1136191 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:07:03,017 : INFO : EPOCH 2 - PROGRESS: at 26.77% examples, 1134892 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:04,030 : INFO : EPOCH 2 - PROGRESS: at 29.88% examples, 1137879 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:07:05,077 : INFO : EPOCH 2 - PROGRESS: at 32.94% examples, 1141295 words/s, in_qsize 62, out_qsize 2\n", + "2018-09-11 22:07:06,088 : INFO : EPOCH 2 - PROGRESS: at 36.01% examples, 1144342 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:07:07,097 : INFO : EPOCH 2 - PROGRESS: at 39.27% examples, 1151864 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:08,113 : INFO : EPOCH 2 - PROGRESS: at 42.20% examples, 1148367 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:09,115 : INFO : EPOCH 2 - PROGRESS: at 45.21% examples, 1149403 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:10,126 : INFO : EPOCH 2 - PROGRESS: at 48.32% examples, 1151635 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:11,127 : INFO : EPOCH 2 - PROGRESS: at 51.45% examples, 1151275 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:12,174 : INFO : EPOCH 2 - PROGRESS: at 54.49% examples, 1150789 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:13,184 : INFO : EPOCH 2 - PROGRESS: at 57.50% examples, 1152143 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:07:14,190 : INFO : EPOCH 2 - PROGRESS: at 60.51% examples, 1151577 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:15,197 : INFO : EPOCH 2 - PROGRESS: at 63.57% examples, 1151112 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:16,206 : INFO : EPOCH 2 - PROGRESS: at 66.70% examples, 1152746 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:17,209 : INFO : EPOCH 2 - PROGRESS: at 69.78% examples, 1155798 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:18,216 : INFO : EPOCH 2 - PROGRESS: at 72.82% examples, 1155564 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:07:19,252 : INFO : EPOCH 2 - PROGRESS: at 75.85% examples, 1154069 words/s, in_qsize 60, out_qsize 4\n", + "2018-09-11 22:07:20,261 : INFO : EPOCH 2 - PROGRESS: at 79.08% examples, 1155871 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:21,266 : INFO : EPOCH 2 - PROGRESS: at 82.09% examples, 1154374 words/s, in_qsize 64, out_qsize 5\n", + "2018-09-11 22:07:22,278 : INFO : EPOCH 2 - PROGRESS: at 85.29% examples, 1156834 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:23,294 : INFO : EPOCH 2 - PROGRESS: at 88.27% examples, 1156316 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:24,304 : INFO : EPOCH 2 - PROGRESS: at 91.18% examples, 1155664 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:25,315 : INFO : EPOCH 2 - PROGRESS: at 94.39% examples, 1157140 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:26,329 : INFO : EPOCH 2 - PROGRESS: at 97.44% examples, 1157852 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:26,915 : INFO : worker thread finished; awaiting finish of 31 more threads\n", + "2018-09-11 22:07:26,933 : INFO : worker thread finished; awaiting finish of 30 more threads\n", + "2018-09-11 22:07:26,942 : INFO : worker thread finished; awaiting finish of 29 more threads\n", + "2018-09-11 22:07:26,952 : INFO : worker thread finished; awaiting finish of 28 more threads\n", + "2018-09-11 22:07:26,954 : INFO : worker thread finished; awaiting finish of 27 more threads\n", + "2018-09-11 22:07:26,988 : INFO : worker thread finished; awaiting finish of 26 more threads\n", + "2018-09-11 22:07:26,997 : INFO : worker thread finished; awaiting finish of 25 more threads\n", + "2018-09-11 22:07:27,012 : INFO : worker thread finished; awaiting finish of 24 more threads\n", + "2018-09-11 22:07:27,021 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2018-09-11 22:07:27,023 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2018-09-11 22:07:27,024 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2018-09-11 22:07:27,025 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2018-09-11 22:07:27,028 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2018-09-11 22:07:27,075 : INFO : worker thread finished; awaiting finish of 18 more threads\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:07:27,078 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2018-09-11 22:07:27,081 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2018-09-11 22:07:27,082 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2018-09-11 22:07:27,083 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2018-09-11 22:07:27,084 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2018-09-11 22:07:27,085 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2018-09-11 22:07:27,086 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2018-09-11 22:07:27,087 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2018-09-11 22:07:27,090 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2018-09-11 22:07:27,091 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2018-09-11 22:07:27,093 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2018-09-11 22:07:27,096 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2018-09-11 22:07:27,103 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2018-09-11 22:07:27,106 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2018-09-11 22:07:27,108 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2018-09-11 22:07:27,114 : INFO : worker thread finished; awaiting finish of 2 more threads\n", + "2018-09-11 22:07:27,117 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2018-09-11 22:07:27,120 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2018-09-11 22:07:27,121 : INFO : EPOCH - 2 : training on 40096354 raw words (38515107 effective words) took 33.2s, 1159858 effective words/s\n", + "2018-09-11 22:07:28,134 : INFO : EPOCH 3 - PROGRESS: at 2.84% examples, 1091427 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:07:29,140 : INFO : EPOCH 3 - PROGRESS: at 5.83% examples, 1126105 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:07:30,140 : INFO : EPOCH 3 - PROGRESS: at 8.87% examples, 1139897 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:07:31,141 : INFO : EPOCH 3 - PROGRESS: at 11.93% examples, 1144843 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:32,153 : INFO : EPOCH 3 - PROGRESS: at 15.09% examples, 1154518 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:33,161 : INFO : EPOCH 3 - PROGRESS: at 18.06% examples, 1154389 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:34,163 : INFO : EPOCH 3 - PROGRESS: at 21.18% examples, 1160369 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:07:35,167 : INFO : EPOCH 3 - PROGRESS: at 24.16% examples, 1160758 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:36,176 : INFO : EPOCH 3 - PROGRESS: at 27.09% examples, 1156190 words/s, in_qsize 62, out_qsize 2\n", + "2018-09-11 22:07:37,190 : INFO : EPOCH 3 - PROGRESS: at 30.20% examples, 1159436 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:38,190 : INFO : EPOCH 3 - PROGRESS: at 33.09% examples, 1157313 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:39,214 : INFO : EPOCH 3 - PROGRESS: at 36.09% examples, 1156185 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:40,220 : INFO : EPOCH 3 - PROGRESS: at 39.26% examples, 1160293 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:41,230 : INFO : EPOCH 3 - PROGRESS: at 42.35% examples, 1160724 words/s, in_qsize 63, out_qsize 2\n", + "2018-09-11 22:07:42,251 : INFO : EPOCH 3 - PROGRESS: at 45.21% examples, 1155701 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:43,289 : INFO : EPOCH 3 - PROGRESS: at 48.37% examples, 1156735 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:44,338 : INFO : EPOCH 3 - PROGRESS: at 51.70% examples, 1157332 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:45,350 : INFO : EPOCH 3 - PROGRESS: at 54.64% examples, 1156108 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:46,351 : INFO : EPOCH 3 - PROGRESS: at 57.68% examples, 1159184 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:47,374 : INFO : EPOCH 3 - PROGRESS: at 60.77% examples, 1158756 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:07:48,385 : INFO : EPOCH 3 - PROGRESS: at 64.07% examples, 1161796 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:07:49,389 : INFO : EPOCH 3 - PROGRESS: at 67.32% examples, 1164943 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:07:50,400 : INFO : EPOCH 3 - PROGRESS: at 70.40% examples, 1167007 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:07:51,400 : INFO : EPOCH 3 - PROGRESS: at 73.40% examples, 1166276 words/s, in_qsize 63, out_qsize 1\n", + "2018-09-11 22:07:52,402 : INFO : EPOCH 3 - PROGRESS: at 76.48% examples, 1166269 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:53,424 : INFO : EPOCH 3 - PROGRESS: at 79.50% examples, 1164484 words/s, in_qsize 63, out_qsize 1\n", + "2018-09-11 22:07:54,428 : INFO : EPOCH 3 - PROGRESS: at 82.69% examples, 1164838 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:07:55,442 : INFO : EPOCH 3 - PROGRESS: at 85.59% examples, 1163133 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:07:56,458 : INFO : EPOCH 3 - PROGRESS: at 88.61% examples, 1163006 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:07:57,476 : INFO : EPOCH 3 - PROGRESS: at 91.46% examples, 1161198 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:07:58,476 : INFO : EPOCH 3 - PROGRESS: at 94.65% examples, 1162323 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:07:59,480 : INFO : EPOCH 3 - PROGRESS: at 97.64% examples, 1162634 words/s, in_qsize 64, out_qsize 3\n", + "2018-09-11 22:07:59,966 : INFO : worker thread finished; awaiting finish of 31 more threads\n", + "2018-09-11 22:07:59,968 : INFO : worker thread finished; awaiting finish of 30 more threads\n", + "2018-09-11 22:07:59,977 : INFO : worker thread finished; awaiting finish of 29 more threads\n", + "2018-09-11 22:07:59,994 : INFO : worker thread finished; awaiting finish of 28 more threads\n", + "2018-09-11 22:07:59,995 : INFO : worker thread finished; awaiting finish of 27 more threads\n", + "2018-09-11 22:08:00,011 : INFO : worker thread finished; awaiting finish of 26 more threads\n", + "2018-09-11 22:08:00,014 : INFO : worker thread finished; awaiting finish of 25 more threads\n", + "2018-09-11 22:08:00,025 : INFO : worker thread finished; awaiting finish of 24 more threads\n", + "2018-09-11 22:08:00,026 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2018-09-11 22:08:00,034 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2018-09-11 22:08:00,035 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2018-09-11 22:08:00,044 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2018-09-11 22:08:00,046 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2018-09-11 22:08:00,070 : INFO : worker thread finished; awaiting finish of 18 more threads\n", + "2018-09-11 22:08:00,072 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2018-09-11 22:08:00,073 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2018-09-11 22:08:00,075 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2018-09-11 22:08:00,078 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2018-09-11 22:08:00,079 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2018-09-11 22:08:00,080 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2018-09-11 22:08:00,086 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2018-09-11 22:08:00,093 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2018-09-11 22:08:00,094 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2018-09-11 22:08:00,099 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2018-09-11 22:08:00,107 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2018-09-11 22:08:00,107 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2018-09-11 22:08:00,113 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2018-09-11 22:08:00,114 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2018-09-11 22:08:00,118 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2018-09-11 22:08:00,119 : INFO : worker thread finished; awaiting finish of 2 more threads\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:08:00,120 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2018-09-11 22:08:00,121 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2018-09-11 22:08:00,122 : INFO : EPOCH - 3 : training on 40096354 raw words (38514587 effective words) took 33.0s, 1167509 effective words/s\n", + "2018-09-11 22:08:01,143 : INFO : EPOCH 4 - PROGRESS: at 2.68% examples, 1023881 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:08:02,151 : INFO : EPOCH 4 - PROGRESS: at 5.81% examples, 1119736 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:03,166 : INFO : EPOCH 4 - PROGRESS: at 8.66% examples, 1105341 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:08:04,183 : INFO : EPOCH 4 - PROGRESS: at 11.91% examples, 1132745 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:05,197 : INFO : EPOCH 4 - PROGRESS: at 14.98% examples, 1136795 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:08:06,200 : INFO : EPOCH 4 - PROGRESS: at 18.05% examples, 1150014 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:07,235 : INFO : EPOCH 4 - PROGRESS: at 21.12% examples, 1147221 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:08,250 : INFO : EPOCH 4 - PROGRESS: at 24.27% examples, 1156023 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:09,252 : INFO : EPOCH 4 - PROGRESS: at 27.21% examples, 1152772 words/s, in_qsize 64, out_qsize 2\n", + "2018-09-11 22:08:10,276 : INFO : EPOCH 4 - PROGRESS: at 30.37% examples, 1155566 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:11,292 : INFO : EPOCH 4 - PROGRESS: at 33.42% examples, 1158765 words/s, in_qsize 64, out_qsize 4\n", + "2018-09-11 22:08:12,299 : INFO : EPOCH 4 - PROGRESS: at 36.67% examples, 1166979 words/s, in_qsize 63, out_qsize 1\n", + "2018-09-11 22:08:13,316 : INFO : EPOCH 4 - PROGRESS: at 39.62% examples, 1162619 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:14,344 : INFO : EPOCH 4 - PROGRESS: at 42.78% examples, 1163480 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:08:15,360 : INFO : EPOCH 4 - PROGRESS: at 45.93% examples, 1166345 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:16,382 : INFO : EPOCH 4 - PROGRESS: at 48.97% examples, 1164876 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:08:17,382 : INFO : EPOCH 4 - PROGRESS: at 52.14% examples, 1164396 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:08:18,384 : INFO : EPOCH 4 - PROGRESS: at 55.22% examples, 1167636 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:19,387 : INFO : EPOCH 4 - PROGRESS: at 58.15% examples, 1167516 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:20,398 : INFO : EPOCH 4 - PROGRESS: at 61.27% examples, 1167315 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:21,419 : INFO : EPOCH 4 - PROGRESS: at 64.51% examples, 1168528 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:22,430 : INFO : EPOCH 4 - PROGRESS: at 67.65% examples, 1168856 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:23,438 : INFO : EPOCH 4 - PROGRESS: at 70.61% examples, 1169227 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:24,447 : INFO : EPOCH 4 - PROGRESS: at 73.73% examples, 1169553 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:08:25,448 : INFO : EPOCH 4 - PROGRESS: at 76.81% examples, 1169504 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:26,456 : INFO : EPOCH 4 - PROGRESS: at 79.93% examples, 1169634 words/s, in_qsize 61, out_qsize 2\n", + "2018-09-11 22:08:27,489 : INFO : EPOCH 4 - PROGRESS: at 83.12% examples, 1168905 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:08:28,508 : INFO : EPOCH 4 - PROGRESS: at 86.31% examples, 1170581 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:29,535 : INFO : EPOCH 4 - PROGRESS: at 89.24% examples, 1169130 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:30,544 : INFO : EPOCH 4 - PROGRESS: at 92.51% examples, 1171842 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:31,544 : INFO : EPOCH 4 - PROGRESS: at 95.58% examples, 1171726 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:32,550 : INFO : EPOCH 4 - PROGRESS: at 98.35% examples, 1169284 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:32,781 : INFO : worker thread finished; awaiting finish of 31 more threads\n", + "2018-09-11 22:08:32,790 : INFO : worker thread finished; awaiting finish of 30 more threads\n", + "2018-09-11 22:08:32,806 : INFO : worker thread finished; awaiting finish of 29 more threads\n", + "2018-09-11 22:08:32,820 : INFO : worker thread finished; awaiting finish of 28 more threads\n", + "2018-09-11 22:08:32,822 : INFO : worker thread finished; awaiting finish of 27 more threads\n", + "2018-09-11 22:08:32,843 : INFO : worker thread finished; awaiting finish of 26 more threads\n", + "2018-09-11 22:08:32,872 : INFO : worker thread finished; awaiting finish of 25 more threads\n", + "2018-09-11 22:08:32,874 : INFO : worker thread finished; awaiting finish of 24 more threads\n", + "2018-09-11 22:08:32,881 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2018-09-11 22:08:32,884 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2018-09-11 22:08:32,895 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2018-09-11 22:08:32,896 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2018-09-11 22:08:32,898 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2018-09-11 22:08:32,899 : INFO : worker thread finished; awaiting finish of 18 more threads\n", + "2018-09-11 22:08:32,900 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2018-09-11 22:08:32,917 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2018-09-11 22:08:32,937 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2018-09-11 22:08:32,940 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2018-09-11 22:08:32,945 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2018-09-11 22:08:32,946 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2018-09-11 22:08:32,948 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2018-09-11 22:08:32,949 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2018-09-11 22:08:32,950 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2018-09-11 22:08:32,951 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2018-09-11 22:08:32,952 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2018-09-11 22:08:32,953 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2018-09-11 22:08:32,956 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2018-09-11 22:08:32,962 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2018-09-11 22:08:32,964 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2018-09-11 22:08:32,965 : INFO : worker thread finished; awaiting finish of 2 more threads\n", + "2018-09-11 22:08:32,968 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2018-09-11 22:08:32,976 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2018-09-11 22:08:32,976 : INFO : EPOCH - 4 : training on 40096354 raw words (38515500 effective words) took 32.8s, 1172993 effective words/s\n", + "2018-09-11 22:08:33,997 : INFO : EPOCH 5 - PROGRESS: at 2.31% examples, 880484 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:08:35,001 : INFO : EPOCH 5 - PROGRESS: at 5.38% examples, 1036308 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:36,033 : INFO : EPOCH 5 - PROGRESS: at 8.47% examples, 1075264 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:37,049 : INFO : EPOCH 5 - PROGRESS: at 11.54% examples, 1096428 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:38,063 : INFO : EPOCH 5 - PROGRESS: at 14.74% examples, 1115318 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:39,070 : INFO : EPOCH 5 - PROGRESS: at 17.81% examples, 1128500 words/s, in_qsize 62, out_qsize 2\n", + "2018-09-11 22:08:40,089 : INFO : EPOCH 5 - PROGRESS: at 20.87% examples, 1133857 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:41,098 : INFO : EPOCH 5 - PROGRESS: at 24.04% examples, 1143894 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:42,122 : INFO : EPOCH 5 - PROGRESS: at 26.94% examples, 1139273 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:43,140 : INFO : EPOCH 5 - PROGRESS: at 29.93% examples, 1136454 words/s, in_qsize 62, out_qsize 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:08:44,141 : INFO : EPOCH 5 - PROGRESS: at 32.88% examples, 1141240 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:45,160 : INFO : EPOCH 5 - PROGRESS: at 35.69% examples, 1134927 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:08:46,160 : INFO : EPOCH 5 - PROGRESS: at 38.80% examples, 1140299 words/s, in_qsize 59, out_qsize 4\n", + "2018-09-11 22:08:47,164 : INFO : EPOCH 5 - PROGRESS: at 41.93% examples, 1143375 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:48,174 : INFO : EPOCH 5 - PROGRESS: at 44.94% examples, 1144228 words/s, in_qsize 60, out_qsize 3\n", + "2018-09-11 22:08:49,178 : INFO : EPOCH 5 - PROGRESS: at 47.99% examples, 1145502 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:50,178 : INFO : EPOCH 5 - PROGRESS: at 51.25% examples, 1148862 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:51,196 : INFO : EPOCH 5 - PROGRESS: at 54.17% examples, 1147181 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:52,216 : INFO : EPOCH 5 - PROGRESS: at 57.12% examples, 1146640 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:53,268 : INFO : EPOCH 5 - PROGRESS: at 60.22% examples, 1146560 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:54,275 : INFO : EPOCH 5 - PROGRESS: at 63.48% examples, 1149945 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:08:55,283 : INFO : EPOCH 5 - PROGRESS: at 66.66% examples, 1152147 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:56,284 : INFO : EPOCH 5 - PROGRESS: at 69.65% examples, 1153625 words/s, in_qsize 64, out_qsize 0\n", + "2018-09-11 22:08:57,286 : INFO : EPOCH 5 - PROGRESS: at 72.78% examples, 1155327 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:08:58,291 : INFO : EPOCH 5 - PROGRESS: at 75.81% examples, 1155265 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:08:59,296 : INFO : EPOCH 5 - PROGRESS: at 78.95% examples, 1156094 words/s, in_qsize 64, out_qsize 1\n", + "2018-09-11 22:09:00,306 : INFO : EPOCH 5 - PROGRESS: at 82.01% examples, 1155082 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:09:01,310 : INFO : EPOCH 5 - PROGRESS: at 85.02% examples, 1154772 words/s, in_qsize 62, out_qsize 1\n", + "2018-09-11 22:09:02,311 : INFO : EPOCH 5 - PROGRESS: at 88.04% examples, 1155913 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:09:03,314 : INFO : EPOCH 5 - PROGRESS: at 91.04% examples, 1156476 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:09:04,321 : INFO : EPOCH 5 - PROGRESS: at 93.91% examples, 1154441 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:09:05,327 : INFO : EPOCH 5 - PROGRESS: at 97.03% examples, 1155561 words/s, in_qsize 63, out_qsize 0\n", + "2018-09-11 22:09:06,032 : INFO : worker thread finished; awaiting finish of 31 more threads\n", + "2018-09-11 22:09:06,052 : INFO : worker thread finished; awaiting finish of 30 more threads\n", + "2018-09-11 22:09:06,054 : INFO : worker thread finished; awaiting finish of 29 more threads\n", + "2018-09-11 22:09:06,074 : INFO : worker thread finished; awaiting finish of 28 more threads\n", + "2018-09-11 22:09:06,087 : INFO : worker thread finished; awaiting finish of 27 more threads\n", + "2018-09-11 22:09:06,089 : INFO : worker thread finished; awaiting finish of 26 more threads\n", + "2018-09-11 22:09:06,090 : INFO : worker thread finished; awaiting finish of 25 more threads\n", + "2018-09-11 22:09:06,091 : INFO : worker thread finished; awaiting finish of 24 more threads\n", + "2018-09-11 22:09:06,101 : INFO : worker thread finished; awaiting finish of 23 more threads\n", + "2018-09-11 22:09:06,103 : INFO : worker thread finished; awaiting finish of 22 more threads\n", + "2018-09-11 22:09:06,104 : INFO : worker thread finished; awaiting finish of 21 more threads\n", + "2018-09-11 22:09:06,112 : INFO : worker thread finished; awaiting finish of 20 more threads\n", + "2018-09-11 22:09:06,123 : INFO : worker thread finished; awaiting finish of 19 more threads\n", + "2018-09-11 22:09:06,131 : INFO : worker thread finished; awaiting finish of 18 more threads\n", + "2018-09-11 22:09:06,157 : INFO : worker thread finished; awaiting finish of 17 more threads\n", + "2018-09-11 22:09:06,169 : INFO : worker thread finished; awaiting finish of 16 more threads\n", + "2018-09-11 22:09:06,172 : INFO : worker thread finished; awaiting finish of 15 more threads\n", + "2018-09-11 22:09:06,173 : INFO : worker thread finished; awaiting finish of 14 more threads\n", + "2018-09-11 22:09:06,173 : INFO : worker thread finished; awaiting finish of 13 more threads\n", + "2018-09-11 22:09:06,174 : INFO : worker thread finished; awaiting finish of 12 more threads\n", + "2018-09-11 22:09:06,175 : INFO : worker thread finished; awaiting finish of 11 more threads\n", + "2018-09-11 22:09:06,176 : INFO : worker thread finished; awaiting finish of 10 more threads\n", + "2018-09-11 22:09:06,176 : INFO : worker thread finished; awaiting finish of 9 more threads\n", + "2018-09-11 22:09:06,177 : INFO : worker thread finished; awaiting finish of 8 more threads\n", + "2018-09-11 22:09:06,180 : INFO : worker thread finished; awaiting finish of 7 more threads\n", + "2018-09-11 22:09:06,181 : INFO : worker thread finished; awaiting finish of 6 more threads\n", + "2018-09-11 22:09:06,197 : INFO : worker thread finished; awaiting finish of 5 more threads\n", + "2018-09-11 22:09:06,199 : INFO : worker thread finished; awaiting finish of 4 more threads\n", + "2018-09-11 22:09:06,200 : INFO : worker thread finished; awaiting finish of 3 more threads\n", + "2018-09-11 22:09:06,205 : INFO : worker thread finished; awaiting finish of 2 more threads\n", + "2018-09-11 22:09:06,207 : INFO : worker thread finished; awaiting finish of 1 more threads\n", + "2018-09-11 22:09:06,211 : INFO : worker thread finished; awaiting finish of 0 more threads\n", + "2018-09-11 22:09:06,211 : INFO : EPOCH - 5 : training on 40096354 raw words (38515593 effective words) took 33.2s, 1159566 effective words/s\n", + "2018-09-11 22:09:06,212 : INFO : training on a 200481770 raw words (192576138 effective words) took 165.1s, 1166216 effective words/s\n", + "2018-09-11 22:09:06,637 : INFO : constructing a sparse term similarity matrix using \n", + "2018-09-11 22:09:06,657 : INFO : iterating over columns in tf-idf order\n", + "2018-09-11 22:09:07,847 : INFO : PROGRESS: at 0.00% columns (1 / 462807, 0.000216% density, 0.000216% projected density)\n", + "2018-09-11 22:09:08,179 : INFO : precomputing L2-norms of word weight vectors\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", " if np.issubdtype(vec.dtype, np.int):\n", - "2018-07-13 01:22:14,011 : INFO : PROGRESS: at 0.22% columns (1001 / 462807, 0.000216% density, 0.000345% projected density)\n", - "2018-07-13 01:35:28,586 : INFO : PROGRESS: at 99.83% columns (462001 / 462807, 0.003633% density, 0.003639% projected density)\n", - "2018-07-13 01:35:34,481 : INFO : constructed a sparse term similarity matrix with 0.003657% density\n" + "2018-09-11 22:09:08,491 : INFO : PROGRESS: at 0.22% columns (1001 / 462807, 0.000216% density, 0.000345% projected density)\n", + "2018-09-11 22:09:08,532 : INFO : PROGRESS: at 0.43% columns (2001 / 462807, 0.000216% density, 0.000302% projected density)\n", + "2018-09-11 22:09:08,643 : INFO : PROGRESS: at 0.65% columns (3001 / 462807, 0.000217% density, 0.000345% projected density)\n", + "2018-09-11 22:09:08,684 : INFO : PROGRESS: at 0.86% columns (4001 / 462807, 0.000217% density, 0.000324% projected density)\n", + "2018-09-11 22:09:08,775 : INFO : PROGRESS: at 1.08% columns (5001 / 462807, 0.000217% density, 0.000337% projected density)\n", + "2018-09-11 22:09:08,850 : INFO : PROGRESS: at 1.30% columns (6001 / 462807, 0.000218% density, 0.000338% projected density)\n", + "2018-09-11 22:09:08,925 : INFO : PROGRESS: at 1.51% columns (7001 / 462807, 0.000218% density, 0.000339% projected density)\n", + "2018-09-11 22:09:09,015 : INFO : PROGRESS: at 1.73% columns (8001 / 462807, 0.000218% density, 0.000345% projected density)\n", + "2018-09-11 22:09:09,059 : INFO : PROGRESS: at 1.94% columns (9001 / 462807, 0.000218% density, 0.000335% projected density)\n", + "2018-09-11 22:09:09,180 : INFO : PROGRESS: at 2.16% columns (10001 / 462807, 0.000219% density, 0.000349% projected density)\n", + "2018-09-11 22:09:09,207 : INFO : PROGRESS: at 2.38% columns (11001 / 462807, 0.000219% density, 0.000337% projected density)\n", + "2018-09-11 22:09:09,313 : INFO : PROGRESS: at 2.59% columns (12001 / 462807, 0.000219% density, 0.000345% projected density)\n", + "2018-09-11 22:09:09,372 : INFO : PROGRESS: at 2.81% columns (13001 / 462807, 0.000220% density, 0.000342% projected density)\n", + "2018-09-11 22:09:09,416 : INFO : PROGRESS: at 3.03% columns (14001 / 462807, 0.000220% density, 0.000336% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:09:09,583 : INFO : PROGRESS: at 3.24% columns (15001 / 462807, 0.000221% density, 0.000353% projected density)\n", + "2018-09-11 22:09:09,610 : INFO : PROGRESS: at 3.46% columns (16001 / 462807, 0.000221% density, 0.000345% projected density)\n", + "2018-09-11 22:09:09,771 : INFO : PROGRESS: at 3.67% columns (17001 / 462807, 0.000221% density, 0.000358% projected density)\n", + "2018-09-11 22:09:09,846 : INFO : PROGRESS: at 3.89% columns (18001 / 462807, 0.000222% density, 0.000358% projected density)\n", + "2018-09-11 22:09:09,889 : INFO : PROGRESS: at 4.11% columns (19001 / 462807, 0.000222% density, 0.000352% projected density)\n", + "2018-09-11 22:09:09,964 : INFO : PROGRESS: at 4.32% columns (20001 / 462807, 0.000222% density, 0.000352% projected density)\n", + "2018-09-11 22:09:10,039 : INFO : PROGRESS: at 4.54% columns (21001 / 462807, 0.000222% density, 0.000352% projected density)\n", + "2018-09-11 22:09:10,113 : INFO : PROGRESS: at 4.75% columns (22001 / 462807, 0.000223% density, 0.000351% projected density)\n", + "2018-09-11 22:09:10,157 : INFO : PROGRESS: at 4.97% columns (23001 / 462807, 0.000223% density, 0.000347% projected density)\n", + "2018-09-11 22:09:10,278 : INFO : PROGRESS: at 5.19% columns (24001 / 462807, 0.000223% density, 0.000353% projected density)\n", + "2018-09-11 22:09:10,368 : INFO : PROGRESS: at 5.40% columns (25001 / 462807, 0.000224% density, 0.000354% projected density)\n", + "2018-09-11 22:09:10,410 : INFO : PROGRESS: at 5.62% columns (26001 / 462807, 0.000224% density, 0.000350% projected density)\n", + "2018-09-11 22:09:10,485 : INFO : PROGRESS: at 5.83% columns (27001 / 462807, 0.000224% density, 0.000350% projected density)\n", + "2018-09-11 22:09:10,573 : INFO : PROGRESS: at 6.05% columns (28001 / 462807, 0.000224% density, 0.000351% projected density)\n", + "2018-09-11 22:09:10,648 : INFO : PROGRESS: at 6.27% columns (29001 / 462807, 0.000225% density, 0.000351% projected density)\n", + "2018-09-11 22:09:10,675 : INFO : PROGRESS: at 6.48% columns (30001 / 462807, 0.000225% density, 0.000346% projected density)\n", + "2018-09-11 22:09:10,773 : INFO : PROGRESS: at 6.70% columns (31001 / 462807, 0.000225% density, 0.000348% projected density)\n", + "2018-09-11 22:09:10,862 : INFO : PROGRESS: at 6.91% columns (32001 / 462807, 0.000225% density, 0.000349% projected density)\n", + "2018-09-11 22:09:10,905 : INFO : PROGRESS: at 7.13% columns (33001 / 462807, 0.000225% density, 0.000346% projected density)\n", + "2018-09-11 22:09:10,996 : INFO : PROGRESS: at 7.35% columns (34001 / 462807, 0.000226% density, 0.000347% projected density)\n", + "2018-09-11 22:09:11,148 : INFO : PROGRESS: at 7.56% columns (35001 / 462807, 0.000226% density, 0.000353% projected density)\n", + "2018-09-11 22:09:11,339 : INFO : PROGRESS: at 7.78% columns (36001 / 462807, 0.000227% density, 0.000361% projected density)\n", + "2018-09-11 22:09:11,380 : INFO : PROGRESS: at 7.99% columns (37001 / 462807, 0.000227% density, 0.000358% projected density)\n", + "2018-09-11 22:09:11,485 : INFO : PROGRESS: at 8.21% columns (38001 / 462807, 0.000228% density, 0.000360% projected density)\n", + "2018-09-11 22:09:11,544 : INFO : PROGRESS: at 8.43% columns (39001 / 462807, 0.000228% density, 0.000359% projected density)\n", + "2018-09-11 22:09:11,603 : INFO : PROGRESS: at 8.64% columns (40001 / 462807, 0.000228% density, 0.000357% projected density)\n", + "2018-09-11 22:09:11,723 : INFO : PROGRESS: at 8.86% columns (41001 / 462807, 0.000229% density, 0.000360% projected density)\n", + "2018-09-11 22:09:11,811 : INFO : PROGRESS: at 9.08% columns (42001 / 462807, 0.000229% density, 0.000360% projected density)\n", + "2018-09-11 22:09:11,885 : INFO : PROGRESS: at 9.29% columns (43001 / 462807, 0.000229% density, 0.000360% projected density)\n", + "2018-09-11 22:09:11,929 : INFO : PROGRESS: at 9.51% columns (44001 / 462807, 0.000230% density, 0.000358% projected density)\n", + "2018-09-11 22:09:12,019 : INFO : PROGRESS: at 9.72% columns (45001 / 462807, 0.000230% density, 0.000358% projected density)\n", + "2018-09-11 22:09:12,129 : INFO : PROGRESS: at 9.94% columns (46001 / 462807, 0.000230% density, 0.000360% projected density)\n", + "2018-09-11 22:09:12,159 : INFO : PROGRESS: at 10.16% columns (47001 / 462807, 0.000230% density, 0.000357% projected density)\n", + "2018-09-11 22:09:12,264 : INFO : PROGRESS: at 10.37% columns (48001 / 462807, 0.000231% density, 0.000358% projected density)\n", + "2018-09-11 22:09:12,292 : INFO : PROGRESS: at 10.59% columns (49001 / 462807, 0.000231% density, 0.000355% projected density)\n", + "2018-09-11 22:09:12,365 : INFO : PROGRESS: at 10.80% columns (50001 / 462807, 0.000231% density, 0.000355% projected density)\n", + "2018-09-11 22:09:12,408 : INFO : PROGRESS: at 11.02% columns (51001 / 462807, 0.000231% density, 0.000353% projected density)\n", + "2018-09-11 22:09:12,484 : INFO : PROGRESS: at 11.24% columns (52001 / 462807, 0.000231% density, 0.000353% projected density)\n", + "2018-09-11 22:09:12,542 : INFO : PROGRESS: at 11.45% columns (53001 / 462807, 0.000232% density, 0.000352% projected density)\n", + "2018-09-11 22:09:12,601 : INFO : PROGRESS: at 11.67% columns (54001 / 462807, 0.000232% density, 0.000351% projected density)\n", + "2018-09-11 22:09:12,676 : INFO : PROGRESS: at 11.88% columns (55001 / 462807, 0.000232% density, 0.000351% projected density)\n", + "2018-09-11 22:09:12,896 : INFO : PROGRESS: at 12.10% columns (56001 / 462807, 0.000233% density, 0.000357% projected density)\n", + "2018-09-11 22:09:12,953 : INFO : PROGRESS: at 12.32% columns (57001 / 462807, 0.000233% density, 0.000356% projected density)\n", + "2018-09-11 22:09:12,997 : INFO : PROGRESS: at 12.53% columns (58001 / 462807, 0.000233% density, 0.000355% projected density)\n", + "2018-09-11 22:09:13,132 : INFO : PROGRESS: at 12.75% columns (59001 / 462807, 0.000234% density, 0.000357% projected density)\n", + "2018-09-11 22:09:13,549 : INFO : PROGRESS: at 12.96% columns (60001 / 462807, 0.000236% density, 0.000371% projected density)\n", + "2018-09-11 22:09:13,654 : INFO : PROGRESS: at 13.18% columns (61001 / 462807, 0.000237% density, 0.000372% projected density)\n", + "2018-09-11 22:09:13,868 : INFO : PROGRESS: at 13.40% columns (62001 / 462807, 0.000238% density, 0.000377% projected density)\n", + "2018-09-11 22:09:13,990 : INFO : PROGRESS: at 13.61% columns (63001 / 462807, 0.000238% density, 0.000378% projected density)\n", + "2018-09-11 22:09:14,065 : INFO : PROGRESS: at 13.83% columns (64001 / 462807, 0.000238% density, 0.000378% projected density)\n", + "2018-09-11 22:09:14,108 : INFO : PROGRESS: at 14.04% columns (65001 / 462807, 0.000239% density, 0.000376% projected density)\n", + "2018-09-11 22:09:14,214 : INFO : PROGRESS: at 14.26% columns (66001 / 462807, 0.000239% density, 0.000376% projected density)\n", + "2018-09-11 22:09:14,257 : INFO : PROGRESS: at 14.48% columns (67001 / 462807, 0.000239% density, 0.000374% projected density)\n", + "2018-09-11 22:09:14,348 : INFO : PROGRESS: at 14.69% columns (68001 / 462807, 0.000239% density, 0.000374% projected density)\n", + "2018-09-11 22:09:14,438 : INFO : PROGRESS: at 14.91% columns (69001 / 462807, 0.000240% density, 0.000375% projected density)\n", + "2018-09-11 22:09:14,512 : INFO : PROGRESS: at 15.13% columns (70001 / 462807, 0.000240% density, 0.000374% projected density)\n", + "2018-09-11 22:09:14,588 : INFO : PROGRESS: at 15.34% columns (71001 / 462807, 0.000240% density, 0.000374% projected density)\n", + "2018-09-11 22:09:14,707 : INFO : PROGRESS: at 15.56% columns (72001 / 462807, 0.000241% density, 0.000375% projected density)\n", + "2018-09-11 22:09:14,766 : INFO : PROGRESS: at 15.77% columns (73001 / 462807, 0.000241% density, 0.000374% projected density)\n", + "2018-09-11 22:09:14,826 : INFO : PROGRESS: at 15.99% columns (74001 / 462807, 0.000241% density, 0.000373% projected density)\n", + "2018-09-11 22:09:14,993 : INFO : PROGRESS: at 16.21% columns (75001 / 462807, 0.000242% density, 0.000376% projected density)\n", + "2018-09-11 22:09:15,083 : INFO : PROGRESS: at 16.42% columns (76001 / 462807, 0.000242% density, 0.000376% projected density)\n", + "2018-09-11 22:09:15,141 : INFO : PROGRESS: at 16.64% columns (77001 / 462807, 0.000243% density, 0.000375% projected density)\n", + "2018-09-11 22:09:15,185 : INFO : PROGRESS: at 16.85% columns (78001 / 462807, 0.000243% density, 0.000374% projected density)\n", + "2018-09-11 22:09:15,260 : INFO : PROGRESS: at 17.07% columns (79001 / 462807, 0.000243% density, 0.000373% projected density)\n", + "2018-09-11 22:09:15,303 : INFO : PROGRESS: at 17.29% columns (80001 / 462807, 0.000243% density, 0.000372% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:09:15,378 : INFO : PROGRESS: at 17.50% columns (81001 / 462807, 0.000243% density, 0.000372% projected density)\n", + "2018-09-11 22:09:15,451 : INFO : PROGRESS: at 17.72% columns (82001 / 462807, 0.000244% density, 0.000371% projected density)\n", + "2018-09-11 22:09:15,494 : INFO : PROGRESS: at 17.93% columns (83001 / 462807, 0.000244% density, 0.000370% projected density)\n", + "2018-09-11 22:09:15,522 : INFO : PROGRESS: at 18.15% columns (84001 / 462807, 0.000244% density, 0.000368% projected density)\n", + "2018-09-11 22:09:15,625 : INFO : PROGRESS: at 18.37% columns (85001 / 462807, 0.000244% density, 0.000369% projected density)\n", + "2018-09-11 22:09:15,745 : INFO : PROGRESS: at 18.58% columns (86001 / 462807, 0.000245% density, 0.000370% projected density)\n", + "2018-09-11 22:09:15,788 : INFO : PROGRESS: at 18.80% columns (87001 / 462807, 0.000245% density, 0.000368% projected density)\n", + "2018-09-11 22:09:15,832 : INFO : PROGRESS: at 19.01% columns (88001 / 462807, 0.000245% density, 0.000367% projected density)\n", + "2018-09-11 22:09:15,875 : INFO : PROGRESS: at 19.23% columns (89001 / 462807, 0.000245% density, 0.000366% projected density)\n", + "2018-09-11 22:09:15,903 : INFO : PROGRESS: at 19.45% columns (90001 / 462807, 0.000245% density, 0.000364% projected density)\n", + "2018-09-11 22:09:15,960 : INFO : PROGRESS: at 19.66% columns (91001 / 462807, 0.000245% density, 0.000363% projected density)\n", + "2018-09-11 22:09:16,065 : INFO : PROGRESS: at 19.88% columns (92001 / 462807, 0.000246% density, 0.000364% projected density)\n", + "2018-09-11 22:09:16,109 : INFO : PROGRESS: at 20.09% columns (93001 / 462807, 0.000246% density, 0.000363% projected density)\n", + "2018-09-11 22:09:16,152 : INFO : PROGRESS: at 20.31% columns (94001 / 462807, 0.000246% density, 0.000362% projected density)\n", + "2018-09-11 22:09:16,211 : INFO : PROGRESS: at 20.53% columns (95001 / 462807, 0.000246% density, 0.000361% projected density)\n", + "2018-09-11 22:09:16,285 : INFO : PROGRESS: at 20.74% columns (96001 / 462807, 0.000246% density, 0.000361% projected density)\n", + "2018-09-11 22:09:16,359 : INFO : PROGRESS: at 20.96% columns (97001 / 462807, 0.000246% density, 0.000360% projected density)\n", + "2018-09-11 22:09:16,432 : INFO : PROGRESS: at 21.18% columns (98001 / 462807, 0.000247% density, 0.000360% projected density)\n", + "2018-09-11 22:09:16,523 : INFO : PROGRESS: at 21.39% columns (99001 / 462807, 0.000247% density, 0.000361% projected density)\n", + "2018-09-11 22:09:16,582 : INFO : PROGRESS: at 21.61% columns (100001 / 462807, 0.000247% density, 0.000360% projected density)\n", + "2018-09-11 22:09:16,706 : INFO : PROGRESS: at 21.82% columns (101001 / 462807, 0.000248% density, 0.000361% projected density)\n", + "2018-09-11 22:09:16,782 : INFO : PROGRESS: at 22.04% columns (102001 / 462807, 0.000248% density, 0.000361% projected density)\n", + "2018-09-11 22:09:16,809 : INFO : PROGRESS: at 22.26% columns (103001 / 462807, 0.000248% density, 0.000359% projected density)\n", + "2018-09-11 22:09:16,973 : INFO : PROGRESS: at 22.47% columns (104001 / 462807, 0.000249% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,105 : INFO : PROGRESS: at 22.69% columns (105001 / 462807, 0.000249% density, 0.000363% projected density)\n", + "2018-09-11 22:09:17,183 : INFO : PROGRESS: at 22.90% columns (106001 / 462807, 0.000250% density, 0.000363% projected density)\n", + "2018-09-11 22:09:17,242 : INFO : PROGRESS: at 23.12% columns (107001 / 462807, 0.000250% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,317 : INFO : PROGRESS: at 23.34% columns (108001 / 462807, 0.000250% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,391 : INFO : PROGRESS: at 23.55% columns (109001 / 462807, 0.000250% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,494 : INFO : PROGRESS: at 23.77% columns (110001 / 462807, 0.000251% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,600 : INFO : PROGRESS: at 23.98% columns (111001 / 462807, 0.000251% density, 0.000363% projected density)\n", + "2018-09-11 22:09:17,675 : INFO : PROGRESS: at 24.20% columns (112001 / 462807, 0.000252% density, 0.000363% projected density)\n", + "2018-09-11 22:09:17,718 : INFO : PROGRESS: at 24.42% columns (113001 / 462807, 0.000252% density, 0.000362% projected density)\n", + "2018-09-11 22:09:17,762 : INFO : PROGRESS: at 24.63% columns (114001 / 462807, 0.000252% density, 0.000361% projected density)\n", + "2018-09-11 22:09:17,897 : INFO : PROGRESS: at 24.85% columns (115001 / 462807, 0.000252% density, 0.000362% projected density)\n", + "2018-09-11 22:09:18,002 : INFO : PROGRESS: at 25.06% columns (116001 / 462807, 0.000253% density, 0.000363% projected density)\n", + "2018-09-11 22:09:18,076 : INFO : PROGRESS: at 25.28% columns (117001 / 462807, 0.000253% density, 0.000363% projected density)\n", + "2018-09-11 22:09:18,104 : INFO : PROGRESS: at 25.50% columns (118001 / 462807, 0.000253% density, 0.000361% projected density)\n", + "2018-09-11 22:09:18,193 : INFO : PROGRESS: at 25.71% columns (119001 / 462807, 0.000253% density, 0.000361% projected density)\n", + "2018-09-11 22:09:18,297 : INFO : PROGRESS: at 25.93% columns (120001 / 462807, 0.000254% density, 0.000362% projected density)\n", + "2018-09-11 22:09:18,340 : INFO : PROGRESS: at 26.15% columns (121001 / 462807, 0.000254% density, 0.000361% projected density)\n", + "2018-09-11 22:09:18,428 : INFO : PROGRESS: at 26.36% columns (122001 / 462807, 0.000254% density, 0.000361% projected density)\n", + "2018-09-11 22:09:18,520 : INFO : PROGRESS: at 26.58% columns (123001 / 462807, 0.000255% density, 0.000361% projected density)\n", + "2018-09-11 22:09:18,653 : INFO : PROGRESS: at 26.79% columns (124001 / 462807, 0.000255% density, 0.000362% projected density)\n", + "2018-09-11 22:09:18,920 : INFO : PROGRESS: at 27.01% columns (125001 / 462807, 0.000257% density, 0.000366% projected density)\n", + "2018-09-11 22:09:19,026 : INFO : PROGRESS: at 27.23% columns (126001 / 462807, 0.000257% density, 0.000367% projected density)\n", + "2018-09-11 22:09:19,146 : INFO : PROGRESS: at 27.44% columns (127001 / 462807, 0.000258% density, 0.000367% projected density)\n", + "2018-09-11 22:09:19,250 : INFO : PROGRESS: at 27.66% columns (128001 / 462807, 0.000258% density, 0.000368% projected density)\n", + "2018-09-11 22:09:19,299 : INFO : PROGRESS: at 27.87% columns (129001 / 462807, 0.000258% density, 0.000367% projected density)\n", + "2018-09-11 22:09:19,373 : INFO : PROGRESS: at 28.09% columns (130001 / 462807, 0.000258% density, 0.000367% projected density)\n", + "2018-09-11 22:09:19,415 : INFO : PROGRESS: at 28.31% columns (131001 / 462807, 0.000258% density, 0.000366% projected density)\n", + "2018-09-11 22:09:19,459 : INFO : PROGRESS: at 28.52% columns (132001 / 462807, 0.000259% density, 0.000365% projected density)\n", + "2018-09-11 22:09:19,502 : INFO : PROGRESS: at 28.74% columns (133001 / 462807, 0.000259% density, 0.000364% projected density)\n", + "2018-09-11 22:09:19,639 : INFO : PROGRESS: at 28.95% columns (134001 / 462807, 0.000259% density, 0.000365% projected density)\n", + "2018-09-11 22:09:19,698 : INFO : PROGRESS: at 29.17% columns (135001 / 462807, 0.000259% density, 0.000365% projected density)\n", + "2018-09-11 22:09:19,741 : INFO : PROGRESS: at 29.39% columns (136001 / 462807, 0.000260% density, 0.000364% projected density)\n", + "2018-09-11 22:09:19,814 : INFO : PROGRESS: at 29.60% columns (137001 / 462807, 0.000260% density, 0.000364% projected density)\n", + "2018-09-11 22:09:19,935 : INFO : PROGRESS: at 29.82% columns (138001 / 462807, 0.000260% density, 0.000365% projected density)\n", + "2018-09-11 22:09:20,039 : INFO : PROGRESS: at 30.03% columns (139001 / 462807, 0.000261% density, 0.000365% projected density)\n", + "2018-09-11 22:09:20,129 : INFO : PROGRESS: at 30.25% columns (140001 / 462807, 0.000261% density, 0.000365% projected density)\n", + "2018-09-11 22:09:20,250 : INFO : PROGRESS: at 30.47% columns (141001 / 462807, 0.000262% density, 0.000366% projected density)\n", + "2018-09-11 22:09:20,322 : INFO : PROGRESS: at 30.68% columns (142001 / 462807, 0.000262% density, 0.000366% projected density)\n", + "2018-09-11 22:09:20,365 : INFO : PROGRESS: at 30.90% columns (143001 / 462807, 0.000262% density, 0.000365% projected density)\n", + "2018-09-11 22:09:20,438 : INFO : PROGRESS: at 31.11% columns (144001 / 462807, 0.000262% density, 0.000365% projected density)\n", + "2018-09-11 22:09:20,466 : INFO : PROGRESS: at 31.33% columns (145001 / 462807, 0.000262% density, 0.000364% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:09:20,523 : INFO : PROGRESS: at 31.55% columns (146001 / 462807, 0.000263% density, 0.000363% projected density)\n", + "2018-09-11 22:09:20,583 : INFO : PROGRESS: at 31.76% columns (147001 / 462807, 0.000263% density, 0.000363% projected density)\n", + "2018-09-11 22:09:20,656 : INFO : PROGRESS: at 31.98% columns (148001 / 462807, 0.000263% density, 0.000363% projected density)\n", + "2018-09-11 22:09:20,731 : INFO : PROGRESS: at 32.20% columns (149001 / 462807, 0.000263% density, 0.000363% projected density)\n", + "2018-09-11 22:09:20,883 : INFO : PROGRESS: at 32.41% columns (150001 / 462807, 0.000264% density, 0.000364% projected density)\n", + "2018-09-11 22:09:20,956 : INFO : PROGRESS: at 32.63% columns (151001 / 462807, 0.000264% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,015 : INFO : PROGRESS: at 32.84% columns (152001 / 462807, 0.000264% density, 0.000363% projected density)\n", + "2018-09-11 22:09:21,150 : INFO : PROGRESS: at 33.06% columns (153001 / 462807, 0.000265% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,193 : INFO : PROGRESS: at 33.28% columns (154001 / 462807, 0.000265% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,313 : INFO : PROGRESS: at 33.49% columns (155001 / 462807, 0.000266% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,356 : INFO : PROGRESS: at 33.71% columns (156001 / 462807, 0.000266% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,451 : INFO : PROGRESS: at 33.92% columns (157001 / 462807, 0.000266% density, 0.000364% projected density)\n", + "2018-09-11 22:09:21,478 : INFO : PROGRESS: at 34.14% columns (158001 / 462807, 0.000266% density, 0.000363% projected density)\n", + "2018-09-11 22:09:21,520 : INFO : PROGRESS: at 34.36% columns (159001 / 462807, 0.000266% density, 0.000362% projected density)\n", + "2018-09-11 22:09:21,593 : INFO : PROGRESS: at 34.57% columns (160001 / 462807, 0.000266% density, 0.000362% projected density)\n", + "2018-09-11 22:09:21,636 : INFO : PROGRESS: at 34.79% columns (161001 / 462807, 0.000267% density, 0.000361% projected density)\n", + "2018-09-11 22:09:21,680 : INFO : PROGRESS: at 35.00% columns (162001 / 462807, 0.000267% density, 0.000361% projected density)\n", + "2018-09-11 22:09:21,754 : INFO : PROGRESS: at 35.22% columns (163001 / 462807, 0.000267% density, 0.000361% projected density)\n", + "2018-09-11 22:09:21,813 : INFO : PROGRESS: at 35.44% columns (164001 / 462807, 0.000267% density, 0.000360% projected density)\n", + "2018-09-11 22:09:21,919 : INFO : PROGRESS: at 35.65% columns (165001 / 462807, 0.000268% density, 0.000361% projected density)\n", + "2018-09-11 22:09:21,978 : INFO : PROGRESS: at 35.87% columns (166001 / 462807, 0.000268% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,035 : INFO : PROGRESS: at 36.08% columns (167001 / 462807, 0.000268% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,154 : INFO : PROGRESS: at 36.30% columns (168001 / 462807, 0.000268% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,244 : INFO : PROGRESS: at 36.52% columns (169001 / 462807, 0.000269% density, 0.000361% projected density)\n", + "2018-09-11 22:09:22,287 : INFO : PROGRESS: at 36.73% columns (170001 / 462807, 0.000269% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,361 : INFO : PROGRESS: at 36.95% columns (171001 / 462807, 0.000269% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,436 : INFO : PROGRESS: at 37.16% columns (172001 / 462807, 0.000269% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,540 : INFO : PROGRESS: at 37.38% columns (173001 / 462807, 0.000270% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,583 : INFO : PROGRESS: at 37.60% columns (174001 / 462807, 0.000270% density, 0.000360% projected density)\n", + "2018-09-11 22:09:22,627 : INFO : PROGRESS: at 37.81% columns (175001 / 462807, 0.000270% density, 0.000359% projected density)\n", + "2018-09-11 22:09:22,700 : INFO : PROGRESS: at 38.03% columns (176001 / 462807, 0.000270% density, 0.000359% projected density)\n", + "2018-09-11 22:09:22,819 : INFO : PROGRESS: at 38.25% columns (177001 / 462807, 0.000271% density, 0.000359% projected density)\n", + "2018-09-11 22:09:22,878 : INFO : PROGRESS: at 38.46% columns (178001 / 462807, 0.000271% density, 0.000359% projected density)\n", + "2018-09-11 22:09:22,922 : INFO : PROGRESS: at 38.68% columns (179001 / 462807, 0.000271% density, 0.000359% projected density)\n", + "2018-09-11 22:09:22,980 : INFO : PROGRESS: at 38.89% columns (180001 / 462807, 0.000271% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,054 : INFO : PROGRESS: at 39.11% columns (181001 / 462807, 0.000272% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,113 : INFO : PROGRESS: at 39.33% columns (182001 / 462807, 0.000272% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,203 : INFO : PROGRESS: at 39.54% columns (183001 / 462807, 0.000272% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,291 : INFO : PROGRESS: at 39.76% columns (184001 / 462807, 0.000273% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,364 : INFO : PROGRESS: at 39.97% columns (185001 / 462807, 0.000273% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,423 : INFO : PROGRESS: at 40.19% columns (186001 / 462807, 0.000273% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,496 : INFO : PROGRESS: at 40.41% columns (187001 / 462807, 0.000273% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,602 : INFO : PROGRESS: at 40.62% columns (188001 / 462807, 0.000274% density, 0.000358% projected density)\n", + "2018-09-11 22:09:23,691 : INFO : PROGRESS: at 40.84% columns (189001 / 462807, 0.000274% density, 0.000358% projected density)\n", + "2018-09-11 22:09:24,274 : INFO : PROGRESS: at 41.05% columns (190001 / 462807, 0.000277% density, 0.000365% projected density)\n", + "2018-09-11 22:09:24,759 : INFO : PROGRESS: at 41.27% columns (191001 / 462807, 0.000280% density, 0.000370% projected density)\n", + "2018-09-11 22:09:24,832 : INFO : PROGRESS: at 41.49% columns (192001 / 462807, 0.000280% density, 0.000370% projected density)\n", + "2018-09-11 22:09:24,910 : INFO : PROGRESS: at 41.70% columns (193001 / 462807, 0.000280% density, 0.000370% projected density)\n", + "2018-09-11 22:09:24,953 : INFO : PROGRESS: at 41.92% columns (194001 / 462807, 0.000280% density, 0.000369% projected density)\n", + "2018-09-11 22:09:25,009 : INFO : PROGRESS: at 42.13% columns (195001 / 462807, 0.000280% density, 0.000369% projected density)\n", + "2018-09-11 22:09:25,067 : INFO : PROGRESS: at 42.35% columns (196001 / 462807, 0.000281% density, 0.000368% projected density)\n", + "2018-09-11 22:09:25,186 : INFO : PROGRESS: at 42.57% columns (197001 / 462807, 0.000281% density, 0.000369% projected density)\n", + "2018-09-11 22:09:25,244 : INFO : PROGRESS: at 42.78% columns (198001 / 462807, 0.000281% density, 0.000369% projected density)\n", + "2018-09-11 22:09:25,271 : INFO : PROGRESS: at 43.00% columns (199001 / 462807, 0.000281% density, 0.000368% projected density)\n", + "2018-09-11 22:09:25,344 : INFO : PROGRESS: at 43.21% columns (200001 / 462807, 0.000282% density, 0.000368% projected density)\n", + "2018-09-11 22:09:25,464 : INFO : PROGRESS: at 43.43% columns (201001 / 462807, 0.000282% density, 0.000368% projected density)\n", + "2018-09-11 22:09:25,492 : INFO : PROGRESS: at 43.65% columns (202001 / 462807, 0.000282% density, 0.000367% projected density)\n", + "2018-09-11 22:09:25,547 : INFO : PROGRESS: at 43.86% columns (203001 / 462807, 0.000282% density, 0.000367% projected density)\n", + "2018-09-11 22:09:25,590 : INFO : PROGRESS: at 44.08% columns (204001 / 462807, 0.000282% density, 0.000366% projected density)\n", + "2018-09-11 22:09:25,633 : INFO : PROGRESS: at 44.30% columns (205001 / 462807, 0.000282% density, 0.000366% projected density)\n", + "2018-09-11 22:09:25,717 : INFO : PROGRESS: at 44.51% columns (206001 / 462807, 0.000283% density, 0.000366% projected density)\n", + "2018-09-11 22:09:25,775 : INFO : PROGRESS: at 44.73% columns (207001 / 462807, 0.000283% density, 0.000365% projected density)\n", + "2018-09-11 22:09:25,861 : INFO : PROGRESS: at 44.94% columns (208001 / 462807, 0.000283% density, 0.000365% projected density)\n", + "2018-09-11 22:09:25,903 : INFO : PROGRESS: at 45.16% columns (209001 / 462807, 0.000283% density, 0.000365% projected density)\n", + "2018-09-11 22:09:25,946 : INFO : PROGRESS: at 45.38% columns (210001 / 462807, 0.000283% density, 0.000364% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:09:25,990 : INFO : PROGRESS: at 45.59% columns (211001 / 462807, 0.000283% density, 0.000364% projected density)\n", + "2018-09-11 22:09:26,033 : INFO : PROGRESS: at 45.81% columns (212001 / 462807, 0.000284% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,075 : INFO : PROGRESS: at 46.02% columns (213001 / 462807, 0.000284% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,147 : INFO : PROGRESS: at 46.24% columns (214001 / 462807, 0.000284% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,236 : INFO : PROGRESS: at 46.46% columns (215001 / 462807, 0.000284% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,292 : INFO : PROGRESS: at 46.67% columns (216001 / 462807, 0.000284% density, 0.000362% projected density)\n", + "2018-09-11 22:09:26,422 : INFO : PROGRESS: at 46.89% columns (217001 / 462807, 0.000285% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,465 : INFO : PROGRESS: at 47.10% columns (218001 / 462807, 0.000285% density, 0.000363% projected density)\n", + "2018-09-11 22:09:26,493 : INFO : PROGRESS: at 47.32% columns (219001 / 462807, 0.000285% density, 0.000362% projected density)\n", + "2018-09-11 22:09:26,628 : INFO : PROGRESS: at 47.54% columns (220001 / 462807, 0.000286% density, 0.000362% projected density)\n", + "2018-09-11 22:09:27,015 : INFO : PROGRESS: at 47.75% columns (221001 / 462807, 0.000288% density, 0.000366% projected density)\n", + "2018-09-11 22:09:27,114 : INFO : PROGRESS: at 47.97% columns (222001 / 462807, 0.000288% density, 0.000366% projected density)\n", + "2018-09-11 22:09:27,942 : INFO : PROGRESS: at 48.18% columns (223001 / 462807, 0.000292% density, 0.000375% projected density)\n", + "2018-09-11 22:09:28,081 : INFO : PROGRESS: at 48.40% columns (224001 / 462807, 0.000293% density, 0.000375% projected density)\n", + "2018-09-11 22:09:28,168 : INFO : PROGRESS: at 48.62% columns (225001 / 462807, 0.000293% density, 0.000375% projected density)\n", + "2018-09-11 22:09:28,212 : INFO : PROGRESS: at 48.83% columns (226001 / 462807, 0.000293% density, 0.000375% projected density)\n", + "2018-09-11 22:09:28,301 : INFO : PROGRESS: at 49.05% columns (227001 / 462807, 0.000294% density, 0.000375% projected density)\n", + "2018-09-11 22:09:28,344 : INFO : PROGRESS: at 49.26% columns (228001 / 462807, 0.000294% density, 0.000374% projected density)\n", + "2018-09-11 22:09:28,402 : INFO : PROGRESS: at 49.48% columns (229001 / 462807, 0.000294% density, 0.000374% projected density)\n", + "2018-09-11 22:09:28,460 : INFO : PROGRESS: at 49.70% columns (230001 / 462807, 0.000294% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,534 : INFO : PROGRESS: at 49.91% columns (231001 / 462807, 0.000295% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,622 : INFO : PROGRESS: at 50.13% columns (232001 / 462807, 0.000295% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,682 : INFO : PROGRESS: at 50.35% columns (233001 / 462807, 0.000295% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,793 : INFO : PROGRESS: at 50.56% columns (234001 / 462807, 0.000296% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,866 : INFO : PROGRESS: at 50.78% columns (235001 / 462807, 0.000296% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,909 : INFO : PROGRESS: at 50.99% columns (236001 / 462807, 0.000296% density, 0.000373% projected density)\n", + "2018-09-11 22:09:28,952 : INFO : PROGRESS: at 51.21% columns (237001 / 462807, 0.000296% density, 0.000372% projected density)\n", + "2018-09-11 22:09:29,013 : INFO : PROGRESS: at 51.43% columns (238001 / 462807, 0.000296% density, 0.000372% projected density)\n", + "2018-09-11 22:09:29,040 : INFO : PROGRESS: at 51.64% columns (239001 / 462807, 0.000296% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,082 : INFO : PROGRESS: at 51.86% columns (240001 / 462807, 0.000296% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,201 : INFO : PROGRESS: at 52.07% columns (241001 / 462807, 0.000297% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,271 : INFO : PROGRESS: at 52.29% columns (242001 / 462807, 0.000297% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,323 : INFO : PROGRESS: at 52.51% columns (243001 / 462807, 0.000297% density, 0.000370% projected density)\n", + "2018-09-11 22:09:29,408 : INFO : PROGRESS: at 52.72% columns (244001 / 462807, 0.000297% density, 0.000370% projected density)\n", + "2018-09-11 22:09:29,467 : INFO : PROGRESS: at 52.94% columns (245001 / 462807, 0.000298% density, 0.000370% projected density)\n", + "2018-09-11 22:09:29,694 : INFO : PROGRESS: at 53.15% columns (246001 / 462807, 0.000299% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,754 : INFO : PROGRESS: at 53.37% columns (247001 / 462807, 0.000299% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,813 : INFO : PROGRESS: at 53.59% columns (248001 / 462807, 0.000299% density, 0.000371% projected density)\n", + "2018-09-11 22:09:29,855 : INFO : PROGRESS: at 53.80% columns (249001 / 462807, 0.000299% density, 0.000370% projected density)\n", + "2018-09-11 22:09:30,044 : INFO : PROGRESS: at 54.02% columns (250001 / 462807, 0.000300% density, 0.000371% projected density)\n", + "2018-09-11 22:09:30,119 : INFO : PROGRESS: at 54.23% columns (251001 / 462807, 0.000300% density, 0.000371% projected density)\n", + "2018-09-11 22:09:30,162 : INFO : PROGRESS: at 54.45% columns (252001 / 462807, 0.000300% density, 0.000371% projected density)\n", + "2018-09-11 22:09:30,205 : INFO : PROGRESS: at 54.67% columns (253001 / 462807, 0.000300% density, 0.000370% projected density)\n", + "2018-09-11 22:09:30,292 : INFO : PROGRESS: at 54.88% columns (254001 / 462807, 0.000301% density, 0.000370% projected density)\n", + "2018-09-11 22:09:30,651 : INFO : PROGRESS: at 55.10% columns (255001 / 462807, 0.000302% density, 0.000373% projected density)\n", + "2018-09-11 22:09:30,719 : INFO : PROGRESS: at 55.31% columns (256001 / 462807, 0.000303% density, 0.000373% projected density)\n", + "2018-09-11 22:09:30,747 : INFO : PROGRESS: at 55.53% columns (257001 / 462807, 0.000303% density, 0.000372% projected density)\n", + "2018-09-11 22:09:30,817 : INFO : PROGRESS: at 55.75% columns (258001 / 462807, 0.000303% density, 0.000372% projected density)\n", + "2018-09-11 22:09:30,915 : INFO : PROGRESS: at 55.96% columns (259001 / 462807, 0.000303% density, 0.000372% projected density)\n", + "2018-09-11 22:09:30,955 : INFO : PROGRESS: at 56.18% columns (260001 / 462807, 0.000303% density, 0.000371% projected density)\n", + "2018-09-11 22:09:31,070 : INFO : PROGRESS: at 56.40% columns (261001 / 462807, 0.000304% density, 0.000372% projected density)\n", + "2018-09-11 22:09:31,332 : INFO : PROGRESS: at 56.61% columns (262001 / 462807, 0.000305% density, 0.000373% projected density)\n", + "2018-09-11 22:09:31,629 : INFO : PROGRESS: at 56.83% columns (263001 / 462807, 0.000307% density, 0.000375% projected density)\n", + "2018-09-11 22:09:31,913 : INFO : PROGRESS: at 57.04% columns (264001 / 462807, 0.000308% density, 0.000377% projected density)\n", + "2018-09-11 22:09:32,218 : INFO : PROGRESS: at 57.26% columns (265001 / 462807, 0.000310% density, 0.000379% projected density)\n", + "2018-09-11 22:09:32,405 : INFO : PROGRESS: at 57.48% columns (266001 / 462807, 0.000310% density, 0.000380% projected density)\n", + "2018-09-11 22:09:32,711 : INFO : PROGRESS: at 57.69% columns (267001 / 462807, 0.000312% density, 0.000382% projected density)\n", + "2018-09-11 22:09:33,010 : INFO : PROGRESS: at 57.91% columns (268001 / 462807, 0.000313% density, 0.000384% projected density)\n", + "2018-09-11 22:09:33,169 : INFO : PROGRESS: at 58.12% columns (269001 / 462807, 0.000314% density, 0.000385% projected density)\n", + "2018-09-11 22:09:33,516 : INFO : PROGRESS: at 58.34% columns (270001 / 462807, 0.000316% density, 0.000387% projected density)\n", + "2018-09-11 22:09:33,661 : INFO : PROGRESS: at 58.56% columns (271001 / 462807, 0.000316% density, 0.000388% projected density)\n", + "2018-09-11 22:09:33,902 : INFO : PROGRESS: at 58.77% columns (272001 / 462807, 0.000318% density, 0.000389% projected density)\n", + "2018-09-11 22:09:34,161 : INFO : PROGRESS: at 58.99% columns (273001 / 462807, 0.000319% density, 0.000390% projected density)\n", + "2018-09-11 22:09:34,368 : INFO : PROGRESS: at 59.20% columns (274001 / 462807, 0.000320% density, 0.000391% projected density)\n", + "2018-09-11 22:09:34,563 : INFO : PROGRESS: at 59.42% columns (275001 / 462807, 0.000321% density, 0.000392% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:09:34,785 : INFO : PROGRESS: at 59.64% columns (276001 / 462807, 0.000322% density, 0.000393% projected density)\n", + "2018-09-11 22:09:34,977 : INFO : PROGRESS: at 59.85% columns (277001 / 462807, 0.000323% density, 0.000394% projected density)\n", + "2018-09-11 22:09:35,220 : INFO : PROGRESS: at 60.07% columns (278001 / 462807, 0.000324% density, 0.000395% projected density)\n", + "2018-09-11 22:09:35,501 : INFO : PROGRESS: at 60.28% columns (279001 / 462807, 0.000325% density, 0.000397% projected density)\n", + "2018-09-11 22:09:35,960 : INFO : PROGRESS: at 60.50% columns (280001 / 462807, 0.000327% density, 0.000400% projected density)\n", + "2018-09-11 22:09:36,200 : INFO : PROGRESS: at 60.72% columns (281001 / 462807, 0.000328% density, 0.000401% projected density)\n", + "2018-09-11 22:09:36,493 : INFO : PROGRESS: at 60.93% columns (282001 / 462807, 0.000330% density, 0.000403% projected density)\n", + "2018-09-11 22:09:36,720 : INFO : PROGRESS: at 61.15% columns (283001 / 462807, 0.000331% density, 0.000404% projected density)\n", + "2018-09-11 22:09:36,995 : INFO : PROGRESS: at 61.36% columns (284001 / 462807, 0.000332% density, 0.000405% projected density)\n", + "2018-09-11 22:09:37,379 : INFO : PROGRESS: at 61.58% columns (285001 / 462807, 0.000334% density, 0.000407% projected density)\n", + "2018-09-11 22:09:37,601 : INFO : PROGRESS: at 61.80% columns (286001 / 462807, 0.000335% density, 0.000408% projected density)\n", + "2018-09-11 22:09:37,813 : INFO : PROGRESS: at 62.01% columns (287001 / 462807, 0.000336% density, 0.000409% projected density)\n", + "2018-09-11 22:09:37,972 : INFO : PROGRESS: at 62.23% columns (288001 / 462807, 0.000337% density, 0.000410% projected density)\n", + "2018-09-11 22:09:38,269 : INFO : PROGRESS: at 62.45% columns (289001 / 462807, 0.000338% density, 0.000411% projected density)\n", + "2018-09-11 22:09:38,437 : INFO : PROGRESS: at 62.66% columns (290001 / 462807, 0.000339% density, 0.000412% projected density)\n", + "2018-09-11 22:09:38,698 : INFO : PROGRESS: at 62.88% columns (291001 / 462807, 0.000340% density, 0.000413% projected density)\n", + "2018-09-11 22:09:38,889 : INFO : PROGRESS: at 63.09% columns (292001 / 462807, 0.000341% density, 0.000414% projected density)\n", + "2018-09-11 22:09:39,164 : INFO : PROGRESS: at 63.31% columns (293001 / 462807, 0.000342% density, 0.000415% projected density)\n", + "2018-09-11 22:09:39,468 : INFO : PROGRESS: at 63.53% columns (294001 / 462807, 0.000344% density, 0.000417% projected density)\n", + "2018-09-11 22:09:39,685 : INFO : PROGRESS: at 63.74% columns (295001 / 462807, 0.000345% density, 0.000418% projected density)\n", + "2018-09-11 22:09:40,037 : INFO : PROGRESS: at 63.96% columns (296001 / 462807, 0.000346% density, 0.000420% projected density)\n", + "2018-09-11 22:09:40,337 : INFO : PROGRESS: at 64.17% columns (297001 / 462807, 0.000348% density, 0.000421% projected density)\n", + "2018-09-11 22:09:40,532 : INFO : PROGRESS: at 64.39% columns (298001 / 462807, 0.000349% density, 0.000422% projected density)\n", + "2018-09-11 22:09:41,057 : INFO : PROGRESS: at 64.61% columns (299001 / 462807, 0.000351% density, 0.000425% projected density)\n", + "2018-09-11 22:09:41,296 : INFO : PROGRESS: at 64.82% columns (300001 / 462807, 0.000352% density, 0.000426% projected density)\n", + "2018-09-11 22:09:41,528 : INFO : PROGRESS: at 65.04% columns (301001 / 462807, 0.000353% density, 0.000427% projected density)\n", + "2018-09-11 22:09:41,814 : INFO : PROGRESS: at 65.25% columns (302001 / 462807, 0.000355% density, 0.000429% projected density)\n", + "2018-09-11 22:09:42,186 : INFO : PROGRESS: at 65.47% columns (303001 / 462807, 0.000357% density, 0.000431% projected density)\n", + "2018-09-11 22:09:42,405 : INFO : PROGRESS: at 65.69% columns (304001 / 462807, 0.000358% density, 0.000431% projected density)\n", + "2018-09-11 22:09:42,646 : INFO : PROGRESS: at 65.90% columns (305001 / 462807, 0.000359% density, 0.000432% projected density)\n", + "2018-09-11 22:09:42,840 : INFO : PROGRESS: at 66.12% columns (306001 / 462807, 0.000360% density, 0.000433% projected density)\n", + "2018-09-11 22:09:43,050 : INFO : PROGRESS: at 66.33% columns (307001 / 462807, 0.000361% density, 0.000434% projected density)\n", + "2018-09-11 22:09:43,234 : INFO : PROGRESS: at 66.55% columns (308001 / 462807, 0.000361% density, 0.000434% projected density)\n", + "2018-09-11 22:09:43,426 : INFO : PROGRESS: at 66.77% columns (309001 / 462807, 0.000362% density, 0.000435% projected density)\n", + "2018-09-11 22:09:43,566 : INFO : PROGRESS: at 66.98% columns (310001 / 462807, 0.000363% density, 0.000435% projected density)\n", + "2018-09-11 22:09:43,721 : INFO : PROGRESS: at 67.20% columns (311001 / 462807, 0.000363% density, 0.000435% projected density)\n", + "2018-09-11 22:09:43,890 : INFO : PROGRESS: at 67.41% columns (312001 / 462807, 0.000364% density, 0.000436% projected density)\n", + "2018-09-11 22:09:44,168 : INFO : PROGRESS: at 67.63% columns (313001 / 462807, 0.000365% density, 0.000437% projected density)\n", + "2018-09-11 22:09:44,385 : INFO : PROGRESS: at 67.85% columns (314001 / 462807, 0.000367% density, 0.000438% projected density)\n", + "2018-09-11 22:09:44,729 : INFO : PROGRESS: at 68.06% columns (315001 / 462807, 0.000368% density, 0.000440% projected density)\n", + "2018-09-11 22:09:45,155 : INFO : PROGRESS: at 68.28% columns (316001 / 462807, 0.000370% density, 0.000441% projected density)\n", + "2018-09-11 22:09:45,688 : INFO : PROGRESS: at 68.50% columns (317001 / 462807, 0.000373% density, 0.000445% projected density)\n", + "2018-09-11 22:09:45,885 : INFO : PROGRESS: at 68.71% columns (318001 / 462807, 0.000374% density, 0.000445% projected density)\n", + "2018-09-11 22:09:46,259 : INFO : PROGRESS: at 68.93% columns (319001 / 462807, 0.000375% density, 0.000447% projected density)\n", + "2018-09-11 22:09:46,802 : INFO : PROGRESS: at 69.14% columns (320001 / 462807, 0.000378% density, 0.000450% projected density)\n", + "2018-09-11 22:09:47,065 : INFO : PROGRESS: at 69.36% columns (321001 / 462807, 0.000379% density, 0.000451% projected density)\n", + "2018-09-11 22:09:47,364 : INFO : PROGRESS: at 69.58% columns (322001 / 462807, 0.000380% density, 0.000452% projected density)\n", + "2018-09-11 22:09:47,946 : INFO : PROGRESS: at 69.79% columns (323001 / 462807, 0.000383% density, 0.000455% projected density)\n", + "2018-09-11 22:09:48,615 : INFO : PROGRESS: at 70.01% columns (324001 / 462807, 0.000386% density, 0.000459% projected density)\n", + "2018-09-11 22:09:49,378 : INFO : PROGRESS: at 70.22% columns (325001 / 462807, 0.000390% density, 0.000464% projected density)\n", + "2018-09-11 22:09:49,943 : INFO : PROGRESS: at 70.44% columns (326001 / 462807, 0.000393% density, 0.000467% projected density)\n", + "2018-09-11 22:09:50,810 : INFO : PROGRESS: at 70.66% columns (327001 / 462807, 0.000397% density, 0.000472% projected density)\n", + "2018-09-11 22:09:51,637 : INFO : PROGRESS: at 70.87% columns (328001 / 462807, 0.000401% density, 0.000477% projected density)\n", + "2018-09-11 22:09:52,334 : INFO : PROGRESS: at 71.09% columns (329001 / 462807, 0.000404% density, 0.000481% projected density)\n", + "2018-09-11 22:09:52,914 : INFO : PROGRESS: at 71.30% columns (330001 / 462807, 0.000407% density, 0.000483% projected density)\n", + "2018-09-11 22:09:53,663 : INFO : PROGRESS: at 71.52% columns (331001 / 462807, 0.000410% density, 0.000487% projected density)\n", + "2018-09-11 22:09:54,239 : INFO : PROGRESS: at 71.74% columns (332001 / 462807, 0.000413% density, 0.000490% projected density)\n", + "2018-09-11 22:09:54,896 : INFO : PROGRESS: at 71.95% columns (333001 / 462807, 0.000416% density, 0.000494% projected density)\n", + "2018-09-11 22:09:55,839 : INFO : PROGRESS: at 72.17% columns (334001 / 462807, 0.000421% density, 0.000500% projected density)\n", + "2018-09-11 22:09:56,442 : INFO : PROGRESS: at 72.38% columns (335001 / 462807, 0.000424% density, 0.000503% projected density)\n", + "2018-09-11 22:09:57,393 : INFO : PROGRESS: at 72.60% columns (336001 / 462807, 0.000428% density, 0.000508% projected density)\n", + "2018-09-11 22:09:58,254 : INFO : PROGRESS: at 72.82% columns (337001 / 462807, 0.000432% density, 0.000513% projected density)\n", + "2018-09-11 22:09:58,984 : INFO : PROGRESS: at 73.03% columns (338001 / 462807, 0.000436% density, 0.000517% projected density)\n", + "2018-09-11 22:09:59,760 : INFO : PROGRESS: at 73.25% columns (339001 / 462807, 0.000439% density, 0.000521% projected density)\n", + "2018-09-11 22:10:00,573 : INFO : PROGRESS: at 73.46% columns (340001 / 462807, 0.000443% density, 0.000526% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:10:01,510 : INFO : PROGRESS: at 73.68% columns (341001 / 462807, 0.000448% density, 0.000531% projected density)\n", + "2018-09-11 22:10:02,327 : INFO : PROGRESS: at 73.90% columns (342001 / 462807, 0.000452% density, 0.000535% projected density)\n", + "2018-09-11 22:10:03,267 : INFO : PROGRESS: at 74.11% columns (343001 / 462807, 0.000456% density, 0.000540% projected density)\n", + "2018-09-11 22:10:03,968 : INFO : PROGRESS: at 74.33% columns (344001 / 462807, 0.000459% density, 0.000543% projected density)\n", + "2018-09-11 22:10:04,741 : INFO : PROGRESS: at 74.55% columns (345001 / 462807, 0.000463% density, 0.000547% projected density)\n", + "2018-09-11 22:10:05,494 : INFO : PROGRESS: at 74.76% columns (346001 / 462807, 0.000466% density, 0.000551% projected density)\n", + "2018-09-11 22:10:06,487 : INFO : PROGRESS: at 74.98% columns (347001 / 462807, 0.000471% density, 0.000556% projected density)\n", + "2018-09-11 22:10:07,188 : INFO : PROGRESS: at 75.19% columns (348001 / 462807, 0.000474% density, 0.000559% projected density)\n", + "2018-09-11 22:10:08,083 : INFO : PROGRESS: at 75.41% columns (349001 / 462807, 0.000478% density, 0.000564% projected density)\n", + "2018-09-11 22:10:10,053 : INFO : PROGRESS: at 75.63% columns (350001 / 462807, 0.000487% density, 0.000574% projected density)\n", + "2018-09-11 22:10:12,327 : INFO : PROGRESS: at 75.84% columns (351001 / 462807, 0.000498% density, 0.000587% projected density)\n", + "2018-09-11 22:10:14,228 : INFO : PROGRESS: at 76.06% columns (352001 / 462807, 0.000506% density, 0.000598% projected density)\n", + "2018-09-11 22:10:16,459 : INFO : PROGRESS: at 76.27% columns (353001 / 462807, 0.000516% density, 0.000610% projected density)\n", + "2018-09-11 22:10:18,614 : INFO : PROGRESS: at 76.49% columns (354001 / 462807, 0.000526% density, 0.000621% projected density)\n", + "2018-09-11 22:10:20,775 : INFO : PROGRESS: at 76.71% columns (355001 / 462807, 0.000535% density, 0.000632% projected density)\n", + "2018-09-11 22:10:23,183 : INFO : PROGRESS: at 76.92% columns (356001 / 462807, 0.000547% density, 0.000646% projected density)\n", + "2018-09-11 22:10:25,411 : INFO : PROGRESS: at 77.14% columns (357001 / 462807, 0.000557% density, 0.000658% projected density)\n", + "2018-09-11 22:10:27,474 : INFO : PROGRESS: at 77.35% columns (358001 / 462807, 0.000566% density, 0.000668% projected density)\n", + "2018-09-11 22:10:29,703 : INFO : PROGRESS: at 77.57% columns (359001 / 462807, 0.000576% density, 0.000680% projected density)\n", + "2018-09-11 22:10:32,122 : INFO : PROGRESS: at 77.79% columns (360001 / 462807, 0.000587% density, 0.000692% projected density)\n", + "2018-09-11 22:10:34,269 : INFO : PROGRESS: at 78.00% columns (361001 / 462807, 0.000596% density, 0.000703% projected density)\n", + "2018-09-11 22:10:36,330 : INFO : PROGRESS: at 78.22% columns (362001 / 462807, 0.000605% density, 0.000713% projected density)\n", + "2018-09-11 22:10:38,492 : INFO : PROGRESS: at 78.43% columns (363001 / 462807, 0.000614% density, 0.000724% projected density)\n", + "2018-09-11 22:10:40,936 : INFO : PROGRESS: at 78.65% columns (364001 / 462807, 0.000625% density, 0.000736% projected density)\n", + "2018-09-11 22:10:43,223 : INFO : PROGRESS: at 78.87% columns (365001 / 462807, 0.000635% density, 0.000747% projected density)\n", + "2018-09-11 22:10:52,364 : INFO : PROGRESS: at 79.08% columns (366001 / 462807, 0.000673% density, 0.000793% projected density)\n", + "2018-09-11 22:11:02,939 : INFO : PROGRESS: at 79.30% columns (367001 / 462807, 0.000715% density, 0.000845% projected density)\n", + "2018-09-11 22:11:13,584 : INFO : PROGRESS: at 79.52% columns (368001 / 462807, 0.000759% density, 0.000898% projected density)\n", + "2018-09-11 22:11:24,075 : INFO : PROGRESS: at 79.73% columns (369001 / 462807, 0.000801% density, 0.000950% projected density)\n", + "2018-09-11 22:11:34,319 : INFO : PROGRESS: at 79.95% columns (370001 / 462807, 0.000841% density, 0.000998% projected density)\n", + "2018-09-11 22:11:44,595 : INFO : PROGRESS: at 80.16% columns (371001 / 462807, 0.000880% density, 0.001045% projected density)\n", + "2018-09-11 22:11:54,738 : INFO : PROGRESS: at 80.38% columns (372001 / 462807, 0.000918% density, 0.001090% projected density)\n", + "2018-09-11 22:12:04,739 : INFO : PROGRESS: at 80.60% columns (373001 / 462807, 0.000956% density, 0.001134% projected density)\n", + "2018-09-11 22:12:14,662 : INFO : PROGRESS: at 80.81% columns (374001 / 462807, 0.000993% density, 0.001177% projected density)\n", + "2018-09-11 22:12:24,550 : INFO : PROGRESS: at 81.03% columns (375001 / 462807, 0.001029% density, 0.001219% projected density)\n", + "2018-09-11 22:12:34,046 : INFO : PROGRESS: at 81.24% columns (376001 / 462807, 0.001062% density, 0.001257% projected density)\n", + "2018-09-11 22:12:43,658 : INFO : PROGRESS: at 81.46% columns (377001 / 462807, 0.001095% density, 0.001295% projected density)\n", + "2018-09-11 22:12:53,184 : INFO : PROGRESS: at 81.68% columns (378001 / 462807, 0.001128% density, 0.001333% projected density)\n", + "2018-09-11 22:13:02,553 : INFO : PROGRESS: at 81.89% columns (379001 / 462807, 0.001160% density, 0.001369% projected density)\n", + "2018-09-11 22:13:11,918 : INFO : PROGRESS: at 82.11% columns (380001 / 462807, 0.001192% density, 0.001404% projected density)\n", + "2018-09-11 22:13:21,246 : INFO : PROGRESS: at 82.32% columns (381001 / 462807, 0.001223% density, 0.001439% projected density)\n", + "2018-09-11 22:13:30,856 : INFO : PROGRESS: at 82.54% columns (382001 / 462807, 0.001257% density, 0.001477% projected density)\n", + "2018-09-11 22:13:40,176 : INFO : PROGRESS: at 82.76% columns (383001 / 462807, 0.001289% density, 0.001512% projected density)\n", + "2018-09-11 22:13:49,328 : INFO : PROGRESS: at 82.97% columns (384001 / 462807, 0.001317% density, 0.001543% projected density)\n", + "2018-09-11 22:13:58,203 : INFO : PROGRESS: at 83.19% columns (385001 / 462807, 0.001345% density, 0.001573% projected density)\n", + "2018-09-11 22:14:07,296 : INFO : PROGRESS: at 83.40% columns (386001 / 462807, 0.001374% density, 0.001605% projected density)\n", + "2018-09-11 22:14:16,559 : INFO : PROGRESS: at 83.62% columns (387001 / 462807, 0.001406% density, 0.001639% projected density)\n", + "2018-09-11 22:14:25,548 : INFO : PROGRESS: at 83.84% columns (388001 / 462807, 0.001435% density, 0.001670% projected density)\n", + "2018-09-11 22:14:34,632 : INFO : PROGRESS: at 84.05% columns (389001 / 462807, 0.001464% density, 0.001700% projected density)\n", + "2018-09-11 22:14:43,637 : INFO : PROGRESS: at 84.27% columns (390001 / 462807, 0.001492% density, 0.001731% projected density)\n", + "2018-09-11 22:14:52,551 : INFO : PROGRESS: at 84.48% columns (391001 / 462807, 0.001520% density, 0.001760% projected density)\n", + "2018-09-11 22:15:01,306 : INFO : PROGRESS: at 84.70% columns (392001 / 462807, 0.001548% density, 0.001788% projected density)\n", + "2018-09-11 22:15:10,440 : INFO : PROGRESS: at 84.92% columns (393001 / 462807, 0.001575% density, 0.001817% projected density)\n", + "2018-09-11 22:15:19,373 : INFO : PROGRESS: at 85.13% columns (394001 / 462807, 0.001604% density, 0.001847% projected density)\n", + "2018-09-11 22:15:28,212 : INFO : PROGRESS: at 85.35% columns (395001 / 462807, 0.001632% density, 0.001875% projected density)\n", + "2018-09-11 22:15:36,979 : INFO : PROGRESS: at 85.57% columns (396001 / 462807, 0.001658% density, 0.001901% projected density)\n", + "2018-09-11 22:15:45,887 : INFO : PROGRESS: at 85.78% columns (397001 / 462807, 0.001686% density, 0.001929% projected density)\n", + "2018-09-11 22:15:54,616 : INFO : PROGRESS: at 86.00% columns (398001 / 462807, 0.001711% density, 0.001955% projected density)\n", + "2018-09-11 22:16:03,469 : INFO : PROGRESS: at 86.21% columns (399001 / 462807, 0.001739% density, 0.001982% projected density)\n", + "2018-09-11 22:16:12,424 : INFO : PROGRESS: at 86.43% columns (400001 / 462807, 0.001767% density, 0.002011% projected density)\n", + "2018-09-11 22:16:21,245 : INFO : PROGRESS: at 86.65% columns (401001 / 462807, 0.001794% density, 0.002037% projected density)\n", + "2018-09-11 22:16:29,841 : INFO : PROGRESS: at 86.86% columns (402001 / 462807, 0.001819% density, 0.002061% projected density)\n", + "2018-09-11 22:16:38,698 : INFO : PROGRESS: at 87.08% columns (403001 / 462807, 0.001846% density, 0.002088% projected density)\n", + "2018-09-11 22:16:47,365 : INFO : PROGRESS: at 87.29% columns (404001 / 462807, 0.001872% density, 0.002113% projected density)\n", + "2018-09-11 22:16:56,252 : INFO : PROGRESS: at 87.51% columns (405001 / 462807, 0.001900% density, 0.002141% projected density)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:17:04,903 : INFO : PROGRESS: at 87.73% columns (406001 / 462807, 0.001926% density, 0.002166% projected density)\n", + "2018-09-11 22:17:13,755 : INFO : PROGRESS: at 87.94% columns (407001 / 462807, 0.001954% density, 0.002193% projected density)\n", + "2018-09-11 22:17:22,668 : INFO : PROGRESS: at 88.16% columns (408001 / 462807, 0.001983% density, 0.002220% projected density)\n", + "2018-09-11 22:17:31,544 : INFO : PROGRESS: at 88.37% columns (409001 / 462807, 0.002011% density, 0.002247% projected density)\n", + "2018-09-11 22:17:40,258 : INFO : PROGRESS: at 88.59% columns (410001 / 462807, 0.002037% density, 0.002271% projected density)\n", + "2018-09-11 22:17:49,178 : INFO : PROGRESS: at 88.81% columns (411001 / 462807, 0.002065% density, 0.002298% projected density)\n", + "2018-09-11 22:17:57,836 : INFO : PROGRESS: at 89.02% columns (412001 / 462807, 0.002090% density, 0.002321% projected density)\n", + "2018-09-11 22:18:06,604 : INFO : PROGRESS: at 89.24% columns (413001 / 462807, 0.002117% density, 0.002346% projected density)\n", + "2018-09-11 22:18:15,764 : INFO : PROGRESS: at 89.45% columns (414001 / 462807, 0.002145% density, 0.002373% projected density)\n", + "2018-09-11 22:18:24,728 : INFO : PROGRESS: at 89.67% columns (415001 / 462807, 0.002174% density, 0.002399% projected density)\n", + "2018-09-11 22:18:33,659 : INFO : PROGRESS: at 89.89% columns (416001 / 462807, 0.002202% density, 0.002426% projected density)\n", + "2018-09-11 22:18:42,538 : INFO : PROGRESS: at 90.10% columns (417001 / 462807, 0.002230% density, 0.002451% projected density)\n", + "2018-09-11 22:18:51,328 : INFO : PROGRESS: at 90.32% columns (418001 / 462807, 0.002257% density, 0.002476% projected density)\n", + "2018-09-11 22:19:00,361 : INFO : PROGRESS: at 90.53% columns (419001 / 462807, 0.002287% density, 0.002503% projected density)\n", + "2018-09-11 22:19:09,335 : INFO : PROGRESS: at 90.75% columns (420001 / 462807, 0.002316% density, 0.002530% projected density)\n", + "2018-09-11 22:19:18,181 : INFO : PROGRESS: at 90.97% columns (421001 / 462807, 0.002344% density, 0.002555% projected density)\n", + "2018-09-11 22:19:27,104 : INFO : PROGRESS: at 91.18% columns (422001 / 462807, 0.002372% density, 0.002581% projected density)\n", + "2018-09-11 22:19:36,312 : INFO : PROGRESS: at 91.40% columns (423001 / 462807, 0.002403% density, 0.002609% projected density)\n", + "2018-09-11 22:19:45,367 : INFO : PROGRESS: at 91.62% columns (424001 / 462807, 0.002433% density, 0.002636% projected density)\n", + "2018-09-11 22:19:54,643 : INFO : PROGRESS: at 91.83% columns (425001 / 462807, 0.002464% density, 0.002664% projected density)\n", + "2018-09-11 22:20:03,890 : INFO : PROGRESS: at 92.05% columns (426001 / 462807, 0.002496% density, 0.002693% projected density)\n", + "2018-09-11 22:20:13,123 : INFO : PROGRESS: at 92.26% columns (427001 / 462807, 0.002527% density, 0.002721% projected density)\n", + "2018-09-11 22:20:22,307 : INFO : PROGRESS: at 92.48% columns (428001 / 462807, 0.002559% density, 0.002749% projected density)\n", + "2018-09-11 22:20:31,523 : INFO : PROGRESS: at 92.70% columns (429001 / 462807, 0.002591% density, 0.002778% projected density)\n", + "2018-09-11 22:20:41,165 : INFO : PROGRESS: at 92.91% columns (430001 / 462807, 0.002623% density, 0.002807% projected density)\n", + "2018-09-11 22:20:50,574 : INFO : PROGRESS: at 93.13% columns (431001 / 462807, 0.002657% density, 0.002837% projected density)\n", + "2018-09-11 22:20:59,920 : INFO : PROGRESS: at 93.34% columns (432001 / 462807, 0.002690% density, 0.002867% projected density)\n", + "2018-09-11 22:21:09,408 : INFO : PROGRESS: at 93.56% columns (433001 / 462807, 0.002725% density, 0.002897% projected density)\n", + "2018-09-11 22:21:18,770 : INFO : PROGRESS: at 93.78% columns (434001 / 462807, 0.002758% density, 0.002927% projected density)\n", + "2018-09-11 22:21:27,940 : INFO : PROGRESS: at 93.99% columns (435001 / 462807, 0.002791% density, 0.002955% projected density)\n", + "2018-09-11 22:21:37,476 : INFO : PROGRESS: at 94.21% columns (436001 / 462807, 0.002826% density, 0.002986% projected density)\n", + "2018-09-11 22:21:46,800 : INFO : PROGRESS: at 94.42% columns (437001 / 462807, 0.002860% density, 0.003016% projected density)\n", + "2018-09-11 22:21:56,171 : INFO : PROGRESS: at 94.64% columns (438001 / 462807, 0.002894% density, 0.003045% projected density)\n", + "2018-09-11 22:22:05,479 : INFO : PROGRESS: at 94.86% columns (439001 / 462807, 0.002928% density, 0.003075% projected density)\n", + "2018-09-11 22:22:15,007 : INFO : PROGRESS: at 95.07% columns (440001 / 462807, 0.002964% density, 0.003106% projected density)\n", + "2018-09-11 22:22:24,355 : INFO : PROGRESS: at 95.29% columns (441001 / 462807, 0.002998% density, 0.003136% projected density)\n", + "2018-09-11 22:22:33,504 : INFO : PROGRESS: at 95.50% columns (442001 / 462807, 0.003032% density, 0.003164% projected density)\n", + "2018-09-11 22:22:42,790 : INFO : PROGRESS: at 95.72% columns (443001 / 462807, 0.003064% density, 0.003191% projected density)\n", + "2018-09-11 22:22:51,720 : INFO : PROGRESS: at 95.94% columns (444001 / 462807, 0.003096% density, 0.003218% projected density)\n", + "2018-09-11 22:23:00,690 : INFO : PROGRESS: at 96.15% columns (445001 / 462807, 0.003127% density, 0.003244% projected density)\n", + "2018-09-11 22:23:09,657 : INFO : PROGRESS: at 96.37% columns (446001 / 462807, 0.003159% density, 0.003270% projected density)\n", + "2018-09-11 22:23:18,549 : INFO : PROGRESS: at 96.58% columns (447001 / 462807, 0.003190% density, 0.003295% projected density)\n", + "2018-09-11 22:23:27,341 : INFO : PROGRESS: at 96.80% columns (448001 / 462807, 0.003220% density, 0.003319% projected density)\n", + "2018-09-11 22:23:36,243 : INFO : PROGRESS: at 97.02% columns (449001 / 462807, 0.003250% density, 0.003343% projected density)\n", + "2018-09-11 22:23:44,917 : INFO : PROGRESS: at 97.23% columns (450001 / 462807, 0.003279% density, 0.003366% projected density)\n", + "2018-09-11 22:23:53,340 : INFO : PROGRESS: at 97.45% columns (451001 / 462807, 0.003306% density, 0.003387% projected density)\n", + "2018-09-11 22:24:01,698 : INFO : PROGRESS: at 97.67% columns (452001 / 462807, 0.003334% density, 0.003409% projected density)\n", + "2018-09-11 22:24:09,997 : INFO : PROGRESS: at 97.88% columns (453001 / 462807, 0.003362% density, 0.003430% projected density)\n", + "2018-09-11 22:24:18,237 : INFO : PROGRESS: at 98.10% columns (454001 / 462807, 0.003388% density, 0.003450% projected density)\n", + "2018-09-11 22:24:26,511 : INFO : PROGRESS: at 98.31% columns (455001 / 462807, 0.003415% density, 0.003470% projected density)\n", + "2018-09-11 22:24:34,673 : INFO : PROGRESS: at 98.53% columns (456001 / 462807, 0.003442% density, 0.003490% projected density)\n", + "2018-09-11 22:24:43,080 : INFO : PROGRESS: at 98.75% columns (457001 / 462807, 0.003470% density, 0.003511% projected density)\n", + "2018-09-11 22:24:51,619 : INFO : PROGRESS: at 98.96% columns (458001 / 462807, 0.003499% density, 0.003533% projected density)\n", + "2018-09-11 22:25:00,450 : INFO : PROGRESS: at 99.18% columns (459001 / 462807, 0.003530% density, 0.003557% projected density)\n", + "2018-09-11 22:25:09,232 : INFO : PROGRESS: at 99.39% columns (460001 / 462807, 0.003561% density, 0.003581% projected density)\n", + "2018-09-11 22:25:18,422 : INFO : PROGRESS: at 99.61% columns (461001 / 462807, 0.003595% density, 0.003608% projected density)\n", + "2018-09-11 22:25:27,447 : INFO : PROGRESS: at 99.83% columns (462001 / 462807, 0.003629% density, 0.003635% projected density)\n", + "2018-09-11 22:25:34,416 : INFO : constructed a sparse term similarity matrix with 0.003654% density\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3h 30min 24s, sys: 3h 53min 52s, total: 7h 24min 17s\n", - "Wall time: 17min 30s\n" + "CPU times: user 4h 38min 32s, sys: 4h 24min 33s, total: 9h 3min 5s\n", + "Wall time: 20min 43s\n" ] } ], @@ -528,12 +1858,5501 @@ "scrolled": true }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:55,395 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:25:55,423 : INFO : creating matrix with 10 documents and 462807 features\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + " if np.issubdtype(vec.dtype, np.int):\n", + "2018-09-11 22:25:55,444 : INFO : Vocabulary size: 35 500\n", + "2018-09-11 22:25:55,447 : INFO : WCD\n", + "2018-09-11 22:25:55,450 : INFO : 0.0\n", + "2018-09-11 22:25:55,451 : INFO : First K WMD\n", + "2018-09-11 22:25:55,457 : INFO : creating matrix with 10 documents and 462807 features\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/matutils.py:738: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n", + " if np.issubdtype(vec.dtype, np.int):\n", + "2018-09-11 22:25:55,473 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,472 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:25:55,523 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,519 : INFO : [(-18.3532657623291, 6), (-17.696285247802734, 8), (-18.018333435058594, 5), (-17.275238037109375, 2), (-17.690784454345703, 4), (-17.200286865234375, 9), (-17.388216018676758, 7), (-0.0, 1), (-16.468210220336914, 0), (-17.58687973022461, 3)]\n", + "2018-09-11 22:25:55,538 : INFO : 0.1\n", + "2018-09-11 22:25:55,544 : INFO : P&P\n", + "2018-09-11 22:25:55,550 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,538 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:55,561 : INFO : WCD\n", + "2018-09-11 22:25:55,560 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,571 : INFO : 0.0\n", + "2018-09-11 22:25:55,580 : INFO : First K WMD\n", + "2018-09-11 22:25:55,601 : INFO : Vocabulary size: 22 500\n", + "2018-09-11 22:25:55,611 : INFO : WCD\n", + "2018-09-11 22:25:55,601 : INFO : [(-21.988279342651367, 0), (-20.70768928527832, 6), (-20.56167984008789, 7), (-20.54451560974121, 1), (-20.21198272705078, 3), (-18.581863403320312, 5), (-20.2303466796875, 9), (-19.87332534790039, 4), (-19.796695709228516, 2), (-18.624996185302734, 8)]\n", + "2018-09-11 22:25:55,616 : INFO : 0.0\n", + "2018-09-11 22:25:55,618 : INFO : P&P\n", + "2018-09-11 22:25:55,618 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,619 : INFO : 0.0\n", + "2018-09-11 22:25:55,619 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,620 : INFO : First K WMD\n", + "2018-09-11 22:25:55,627 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,631 : INFO : [(-21.16857147216797, 4), (-20.984590530395508, 8), (-19.005477905273438, 2), (-19.81128692626953, 7), (-18.952373504638672, 5), (-17.643348693847656, 6), (-0.0, 1), (-17.959074020385742, 0), (-19.516456604003906, 3), (-18.704994201660156, 9)]\n", + "2018-09-11 22:25:55,633 : INFO : 0.0\n", + "2018-09-11 22:25:55,634 : INFO : P&P\n", + "2018-09-11 22:25:55,636 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,646 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:55,648 : INFO : WCD\n", + "2018-09-11 22:25:55,649 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,651 : INFO : 0.0\n", + "2018-09-11 22:25:55,653 : INFO : First K WMD\n", + "2018-09-11 22:25:55,661 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,661 : INFO : Vocabulary size: 35 500\n", + "2018-09-11 22:25:55,663 : INFO : WCD\n", + "2018-09-11 22:25:55,663 : INFO : [(-18.646310806274414, 2), (-18.35845375061035, 8), (-17.172510147094727, 3), (-16.983802795410156, 5), (-18.001632690429688, 4), (-16.23831558227539, 9), (-15.49134349822998, 7), (-15.965887069702148, 0), (-16.253746032714844, 6), (-17.6544189453125, 1)]\n", + "2018-09-11 22:25:55,665 : INFO : 0.0\n", + "2018-09-11 22:25:55,666 : INFO : 0.0\n", + "2018-09-11 22:25:55,666 : INFO : P&P\n", + "2018-09-11 22:25:55,667 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,667 : INFO : First K WMD\n", + "2018-09-11 22:25:55,680 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,683 : INFO : [(-19.602481842041016, 9), (-17.52560806274414, 8), (-15.159758567810059, 0), (-15.154800415039062, 4), (-17.124284744262695, 2), (-14.304398536682129, 7), (-12.30762767791748, 6), (-14.262770652770996, 5), (-0.0, 1), (-15.9633150100708, 3)]\n", + "2018-09-11 22:25:55,684 : INFO : 0.0\n", + "2018-09-11 22:25:55,686 : INFO : P&P\n", + "2018-09-11 22:25:55,687 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,691 : INFO : Vocabulary size: 10 500\n", + "2018-09-11 22:25:55,691 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,693 : INFO : WCD\n", + "2018-09-11 22:25:55,695 : INFO : 0.0\n", + "2018-09-11 22:25:55,697 : INFO : First K WMD\n", + "2018-09-11 22:25:55,706 : INFO : [(-22.006179809570312, 4), (-20.733144760131836, 3), (-21.352088928222656, 0), (-19.656925201416016, 6), (-19.52262306213379, 8), (-18.563873291015625, 5), (-19.805952072143555, 7), (-18.82122802734375, 1), (-17.739513397216797, 2), (-18.486942291259766, 9)]\n", + "2018-09-11 22:25:55,708 : INFO : 0.0\n", + "2018-09-11 22:25:55,709 : INFO : P&P\n", + "2018-09-11 22:25:55,711 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,713 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,717 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:55,719 : INFO : WCD\n", + "2018-09-11 22:25:55,722 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,722 : INFO : 0.0\n", + "2018-09-11 22:25:55,724 : INFO : First K WMD\n", + "2018-09-11 22:25:55,732 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:55,734 : INFO : WCD\n", + "2018-09-11 22:25:55,734 : INFO : [(-19.538833618164062, 6), (-19.469524383544922, 3), (-18.020339965820312, 9), (-19.16213607788086, 2), (-17.485410690307617, 7), (-15.716273307800293, 1), (-17.09773826599121, 8), (-12.16711711883545, 4), (-16.968128204345703, 5), (-14.38237476348877, 0)]\n", + "2018-09-11 22:25:55,736 : INFO : 0.0\n", + "2018-09-11 22:25:55,737 : INFO : 0.0\n", + "2018-09-11 22:25:55,737 : INFO : P&P\n", + "2018-09-11 22:25:55,738 : INFO : First K WMD\n", + "2018-09-11 22:25:55,738 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,746 : INFO : [(-17.66437530517578, 7), (-16.575199127197266, 9), (-17.5963191986084, 0), (-16.198196411132812, 4), (-15.999327659606934, 5), (-16.308128356933594, 1), (-16.7314510345459, 6), (-12.109302520751953, 3), (-16.137367248535156, 8), (-11.240537643432617, 2)]\n", + "2018-09-11 22:25:55,746 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,747 : INFO : 0.0\n", + "2018-09-11 22:25:55,748 : INFO : P&P\n", + "2018-09-11 22:25:55,749 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,754 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,763 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:55,764 : INFO : WCD\n", + "2018-09-11 22:25:55,767 : INFO : 0.0\n", + "2018-09-11 22:25:55,769 : INFO : First K WMD\n", + "2018-09-11 22:25:55,773 : INFO : Vocabulary size: 24 500\n", + "2018-09-11 22:25:55,774 : INFO : WCD\n", + "2018-09-11 22:25:55,776 : INFO : [(-23.43346405029297, 8), (-23.24972915649414, 3), (-22.001794815063477, 6), (-21.205509185791016, 4), (-23.23835563659668, 7), (-19.696102142333984, 5), (-20.61865234375, 2), (-19.96299171447754, 1), (-0.0, 0), (-22.795507431030273, 9)]\n", + "2018-09-11 22:25:55,777 : INFO : 0.0\n", + "2018-09-11 22:25:55,777 : INFO : 0.0\n", + "2018-09-11 22:25:55,778 : INFO : First K WMD\n", + "2018-09-11 22:25:55,779 : INFO : P&P\n", + "2018-09-11 22:25:55,780 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,780 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,787 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,791 : INFO : [(-20.154441833496094, 7), (-19.807659149169922, 3), (-18.77899169921875, 5), (-19.65380859375, 8), (-18.29581069946289, 9), (-18.18106460571289, 2), (-17.32356834411621, 6), (-14.252035140991211, 1), (-16.55240821838379, 0), (-16.249374389648438, 4)]\n", + "2018-09-11 22:25:55,793 : INFO : 0.0\n", + "2018-09-11 22:25:55,794 : INFO : P&P\n", + "2018-09-11 22:25:55,795 : INFO : stopped by early_stop condition\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:55,803 : INFO : Vocabulary size: 3 500\n", + "2018-09-11 22:25:55,804 : INFO : WCD\n", + "2018-09-11 22:25:55,807 : INFO : 0.0\n", + "2018-09-11 22:25:55,808 : INFO : First K WMD\n", + "2018-09-11 22:25:55,813 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,815 : INFO : [(-23.323942184448242, 1), (-23.062299728393555, 2), (-23.259197235107422, 4), (-22.928569793701172, 8), (-22.76712989807129, 7), (-22.777070999145508, 6), (-22.7784423828125, 3), (-21.306692123413086, 0), (-22.706424713134766, 5), (-20.88772201538086, 9)]\n", + "2018-09-11 22:25:55,817 : INFO : 0.0\n", + "2018-09-11 22:25:55,818 : INFO : P&P\n", + "2018-09-11 22:25:55,819 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,820 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,822 : INFO : Vocabulary size: 21 500\n", + "2018-09-11 22:25:55,824 : INFO : WCD\n", + "2018-09-11 22:25:55,826 : INFO : 0.0\n", + "2018-09-11 22:25:55,828 : INFO : First K WMD\n", + "2018-09-11 22:25:55,841 : INFO : [(-21.382509231567383, 0), (-20.77737045288086, 9), (-19.848779678344727, 8), (-20.041160583496094, 6), (-20.022632598876953, 5), (-16.837387084960938, 2), (-18.998977661132812, 7), (-19.660579681396484, 1), (-18.9355411529541, 4), (-19.273841857910156, 3)]\n", + "2018-09-11 22:25:55,843 : INFO : 0.0\n", + "2018-09-11 22:25:55,844 : INFO : P&P\n", + "2018-09-11 22:25:55,845 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,845 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:55,846 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,847 : INFO : WCD\n", + "2018-09-11 22:25:55,850 : INFO : 0.0\n", + "2018-09-11 22:25:55,850 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,851 : INFO : First K WMD\n", + "2018-09-11 22:25:55,863 : INFO : [(-20.3286075592041, 6), (-19.950546264648438, 9), (-20.080608367919922, 7), (-18.880441665649414, 1), (-19.422744750976562, 4), (-19.74907875061035, 3), (-18.665950775146484, 5), (-18.49311637878418, 0), (-18.79434585571289, 2), (-18.97876739501953, 8)]\n", + "2018-09-11 22:25:55,864 : INFO : 0.0\n", + "2018-09-11 22:25:55,866 : INFO : P&P\n", + "2018-09-11 22:25:55,867 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,868 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:55,870 : INFO : WCD\n", + "2018-09-11 22:25:55,872 : INFO : 0.0\n", + "2018-09-11 22:25:55,874 : INFO : First K WMD\n", + "2018-09-11 22:25:55,878 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,881 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,885 : INFO : [(-17.958023071289062, 5), (-17.812532424926758, 2), (-17.225196838378906, 3), (-17.27032470703125, 1), (-16.108379364013672, 0), (-16.83753776550293, 4), (-15.806846618652344, 9), (-14.22840690612793, 7), (-16.338254928588867, 8), (-15.91298770904541, 6)]\n", + "2018-09-11 22:25:55,886 : INFO : 0.0\n", + "2018-09-11 22:25:55,888 : INFO : P&P\n", + "2018-09-11 22:25:55,889 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,892 : INFO : Vocabulary size: 23 500\n", + "2018-09-11 22:25:55,894 : INFO : WCD\n", + "2018-09-11 22:25:55,896 : INFO : 0.0\n", + "2018-09-11 22:25:55,898 : INFO : First K WMD\n", + "2018-09-11 22:25:55,911 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,912 : INFO : [(-20.195669174194336, 8), (-19.792341232299805, 9), (-19.292020797729492, 2), (-19.661174774169922, 3), (-18.63113784790039, 7), (-17.947668075561523, 1), (-18.550315856933594, 6), (-19.05259132385254, 4), (-0.0, 0), (-17.864927291870117, 5)]\n", + "2018-09-11 22:25:55,913 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:55,913 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,914 : INFO : 0.0\n", + "2018-09-11 22:25:55,914 : INFO : WCD\n", + "2018-09-11 22:25:55,915 : INFO : P&P\n", + "2018-09-11 22:25:55,916 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,917 : INFO : 0.0\n", + "2018-09-11 22:25:55,919 : INFO : First K WMD\n", + "2018-09-11 22:25:55,936 : INFO : [(-19.626619338989258, 5), (-19.440523147583008, 4), (-19.088150024414062, 1), (-19.1726016998291, 7), (-19.416805267333984, 9), (-17.660308837890625, 8), (-17.836233139038086, 3), (-17.8324031829834, 6), (-18.468782424926758, 2), (-16.211891174316406, 0)]\n", + "2018-09-11 22:25:55,938 : INFO : 0.0\n", + "2018-09-11 22:25:55,939 : INFO : P&P\n", + "2018-09-11 22:25:55,941 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,941 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,942 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,953 : INFO : Vocabulary size: 29 500\n", + "2018-09-11 22:25:55,955 : INFO : WCD\n", + "2018-09-11 22:25:55,957 : INFO : 0.0\n", + "2018-09-11 22:25:55,959 : INFO : First K WMD\n", + "2018-09-11 22:25:55,966 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:55,967 : INFO : WCD\n", + "2018-09-11 22:25:55,970 : INFO : 0.0\n", + "2018-09-11 22:25:55,972 : INFO : First K WMD\n", + "2018-09-11 22:25:55,972 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,973 : INFO : [(-20.902673721313477, 6), (-20.213159561157227, 5), (-17.88736343383789, 8), (-17.738933563232422, 7), (-16.033185958862305, 2), (-17.53417205810547, 3), (-17.4708309173584, 9), (-0.0, 0), (-16.67050552368164, 1), (-15.291075706481934, 4)]\n", + "2018-09-11 22:25:55,973 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:55,974 : INFO : 0.0\n", + "2018-09-11 22:25:55,975 : INFO : P&P\n", + "2018-09-11 22:25:55,976 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,981 : INFO : [(-21.802812576293945, 3), (-19.608333587646484, 4), (-19.16099739074707, 8), (-18.2373046875, 5), (-19.595306396484375, 1), (-17.08930778503418, 7), (-18.015342712402344, 2), (-17.573928833007812, 9), (-17.114675521850586, 0), (-18.995807647705078, 6)]\n", + "2018-09-11 22:25:55,983 : INFO : 0.0\n", + "2018-09-11 22:25:55,984 : INFO : P&P\n", + "2018-09-11 22:25:55,985 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:55,998 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:55,999 : INFO : WCD\n", + "2018-09-11 22:25:56,002 : INFO : 0.0\n", + "2018-09-11 22:25:56,003 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,004 : INFO : First K WMD\n", + "2018-09-11 22:25:56,007 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,009 : INFO : Vocabulary size: 5 500\n", + "2018-09-11 22:25:56,011 : INFO : WCD\n", + "2018-09-11 22:25:56,012 : INFO : [(-19.761404037475586, 8), (-19.549779891967773, 7), (-19.22831153869629, 6), (-19.4384765625, 9), (-18.67774200439453, 2), (-18.076183319091797, 5), (-18.967588424682617, 3), (-14.528512001037598, 0), (-19.051965713500977, 4), (-18.426359176635742, 1)]\n", + "2018-09-11 22:25:56,013 : INFO : 0.0\n", + "2018-09-11 22:25:56,013 : INFO : 0.0\n", + "2018-09-11 22:25:56,014 : INFO : P&P\n", + "2018-09-11 22:25:56,014 : INFO : First K WMD\n", + "2018-09-11 22:25:56,015 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,021 : INFO : [(-20.25940704345703, 9), (-19.591472625732422, 5), (-19.750045776367188, 8), (-19.23674774169922, 1), (-19.304819107055664, 2), (-18.871910095214844, 7), (-18.547531127929688, 4), (-18.201860427856445, 6), (-18.617164611816406, 3), (-18.084196090698242, 0)]\n", + "2018-09-11 22:25:56,022 : INFO : 0.0\n", + "2018-09-11 22:25:56,023 : INFO : P&P\n", + "2018-09-11 22:25:56,025 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,033 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,037 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:56,039 : INFO : WCD\n", + "2018-09-11 22:25:56,041 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,042 : INFO : 0.0\n", + "2018-09-11 22:25:56,043 : INFO : First K WMD\n", + "2018-09-11 22:25:56,049 : INFO : Vocabulary size: 20 500\n", + "2018-09-11 22:25:56,050 : INFO : WCD\n", + "2018-09-11 22:25:56,052 : INFO : 0.0\n", + "2018-09-11 22:25:56,053 : INFO : [(-20.461082458496094, 7), (-17.520212173461914, 6), (-19.72653579711914, 8), (-17.501651763916016, 5), (-17.149765014648438, 9), (-14.484685897827148, 1), (-18.086811065673828, 3), (-15.92967414855957, 4), (-0.0, 0), (-14.783743858337402, 2)]\n", + "2018-09-11 22:25:56,054 : INFO : First K WMD\n", + "2018-09-11 22:25:56,054 : INFO : 0.0\n", + "2018-09-11 22:25:56,055 : INFO : P&P\n", + "2018-09-11 22:25:56,056 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,065 : INFO : creating matrix with 10 documents and 462807 features\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:56,066 : INFO : [(-17.6803035736084, 9), (-16.80386734008789, 7), (-15.549836158752441, 0), (-15.82524299621582, 3), (-16.073200225830078, 6), (-14.176229476928711, 1), (-15.330875396728516, 4), (-15.516229629516602, 2), (-15.30439567565918, 5), (-15.97109317779541, 8)]\n", + "2018-09-11 22:25:56,067 : INFO : 0.0\n", + "2018-09-11 22:25:56,068 : INFO : P&P\n", + "2018-09-11 22:25:56,069 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,075 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,079 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:56,081 : INFO : WCD\n", + "2018-09-11 22:25:56,084 : INFO : 0.0\n", + "2018-09-11 22:25:56,085 : INFO : First K WMD\n", + "2018-09-11 22:25:56,091 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:56,093 : INFO : WCD\n", + "2018-09-11 22:25:56,095 : INFO : 0.0\n", + "2018-09-11 22:25:56,095 : INFO : [(-21.149734497070312, 3), (-21.115989685058594, 9), (-20.278905868530273, 4), (-20.617753982543945, 7), (-21.0872859954834, 1), (-17.89203643798828, 0), (-20.19639778137207, 6), (-20.13616180419922, 8), (-20.193117141723633, 2), (-20.190410614013672, 5)]\n", + "2018-09-11 22:25:56,097 : INFO : First K WMD\n", + "2018-09-11 22:25:56,097 : INFO : 0.0\n", + "2018-09-11 22:25:56,098 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,098 : INFO : P&P\n", + "2018-09-11 22:25:56,099 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,103 : INFO : [(-22.634946823120117, 6), (-22.10260772705078, 7), (-21.173978805541992, 8), (-22.065372467041016, 2), (-20.62103843688965, 3), (-20.59207534790039, 9), (-20.936670303344727, 4), (-21.754261016845703, 5), (-19.775453567504883, 1), (-20.612579345703125, 0)]\n", + "2018-09-11 22:25:56,104 : INFO : 0.0\n", + "2018-09-11 22:25:56,105 : INFO : P&P\n", + "2018-09-11 22:25:56,107 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,108 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,128 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:56,129 : INFO : WCD\n", + "2018-09-11 22:25:56,129 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,132 : INFO : 0.0\n", + "2018-09-11 22:25:56,133 : INFO : First K WMD\n", + "2018-09-11 22:25:56,141 : INFO : [(-19.563215255737305, 8), (-19.274362564086914, 2), (-18.956541061401367, 1), (-18.426982879638672, 5), (-18.747018814086914, 9), (-18.51417350769043, 3), (-17.255308151245117, 4), (-14.586370468139648, 7), (-16.63251304626465, 6), (-15.862062454223633, 0)]\n", + "2018-09-11 22:25:56,141 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,142 : INFO : 0.0\n", + "2018-09-11 22:25:56,143 : INFO : P&P\n", + "2018-09-11 22:25:56,144 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,151 : INFO : Vocabulary size: 27 500\n", + "2018-09-11 22:25:56,153 : INFO : WCD\n", + "2018-09-11 22:25:56,156 : INFO : 0.0\n", + "2018-09-11 22:25:56,158 : INFO : First K WMD\n", + "2018-09-11 22:25:56,159 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,167 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:56,168 : INFO : WCD\n", + "2018-09-11 22:25:56,170 : INFO : 0.0\n", + "2018-09-11 22:25:56,170 : INFO : [(-20.854450225830078, 1), (-20.087173461914062, 3), (-18.11112403869629, 5), (-18.67387580871582, 4), (-18.692378997802734, 2), (-17.08364486694336, 8), (-15.590371131896973, 7), (-17.850749969482422, 0), (-17.35025405883789, 6), (-16.961801528930664, 9)]\n", + "2018-09-11 22:25:56,172 : INFO : 0.0\n", + "2018-09-11 22:25:56,172 : INFO : First K WMD\n", + "2018-09-11 22:25:56,173 : INFO : P&P\n", + "2018-09-11 22:25:56,174 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,175 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,184 : INFO : [(-20.647842407226562, 9), (-20.135128021240234, 4), (-20.383577346801758, 8), (-19.18454360961914, 6), (-17.43831443786621, 1), (-20.21662712097168, 5), (-18.059885025024414, 7), (-17.43181610107422, 2), (-17.275691986083984, 3), (-16.6737117767334, 0)]\n", + "2018-09-11 22:25:56,185 : INFO : 0.0\n", + "2018-09-11 22:25:56,187 : INFO : P&P\n", + "2018-09-11 22:25:56,188 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,188 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,196 : INFO : Vocabulary size: 24 500\n", + "2018-09-11 22:25:56,197 : INFO : WCD\n", + "2018-09-11 22:25:56,200 : INFO : 0.0\n", + "2018-09-11 22:25:56,201 : INFO : First K WMD\n", + "2018-09-11 22:25:56,208 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,210 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:56,212 : INFO : WCD\n", + "2018-09-11 22:25:56,213 : INFO : [(-20.11207389831543, 9), (-19.999614715576172, 5), (-20.071931838989258, 7), (-19.873260498046875, 1), (-19.918132781982422, 8), (-0.0, 0), (-19.58254051208496, 3), (-18.951669692993164, 6), (-19.16880989074707, 2), (-19.330154418945312, 4)]\n", + "2018-09-11 22:25:56,214 : INFO : 0.0\n", + "2018-09-11 22:25:56,214 : INFO : 0.0\n", + "2018-09-11 22:25:56,215 : INFO : First K WMD\n", + "2018-09-11 22:25:56,215 : INFO : P&P\n", + "2018-09-11 22:25:56,216 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,219 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,223 : INFO : [(-20.3991756439209, 9), (-17.216903686523438, 7), (-18.431886672973633, 1), (-16.981813430786133, 8), (-13.54394817352295, 4), (-14.59924602508545, 0), (-11.579438209533691, 2), (-16.008808135986328, 6), (-16.15455436706543, 3), (-13.519278526306152, 5)]\n", + "2018-09-11 22:25:56,225 : INFO : 0.0\n", + "2018-09-11 22:25:56,226 : INFO : P&P\n", + "2018-09-11 22:25:56,227 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,237 : INFO : Vocabulary size: 5 500\n", + "2018-09-11 22:25:56,239 : INFO : WCD\n", + "2018-09-11 22:25:56,242 : INFO : 0.0\n", + "2018-09-11 22:25:56,242 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,243 : INFO : First K WMD\n", + "2018-09-11 22:25:56,249 : INFO : [(-20.518543243408203, 4), (-19.818283081054688, 3), (-19.054323196411133, 9), (-19.13144302368164, 6), (-18.85399055480957, 0), (-17.762746810913086, 2), (-17.537527084350586, 5), (-18.18317413330078, 7), (-18.866846084594727, 8), (-17.576276779174805, 1)]\n", + "2018-09-11 22:25:56,250 : INFO : 0.0\n", + "2018-09-11 22:25:56,251 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,251 : INFO : P&P\n", + "2018-09-11 22:25:56,252 : INFO : Vocabulary size: 10 500\n", + "2018-09-11 22:25:56,252 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,253 : INFO : WCD\n", + "2018-09-11 22:25:56,255 : INFO : 0.0\n", + "2018-09-11 22:25:56,257 : INFO : First K WMD\n", + "2018-09-11 22:25:56,266 : INFO : [(-21.19036102294922, 7), (-20.647911071777344, 5), (-19.81697654724121, 0), (-19.627792358398438, 1), (-20.208921432495117, 4), (-19.641433715820312, 9), (-17.361879348754883, 8), (-18.202157974243164, 2), (-17.099437713623047, 3), (-18.382205963134766, 6)]\n", + "2018-09-11 22:25:56,268 : INFO : 0.0\n", + "2018-09-11 22:25:56,269 : INFO : P&P\n", + "2018-09-11 22:25:56,270 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,273 : INFO : Vocabulary size: 23 500\n", + "2018-09-11 22:25:56,275 : INFO : WCD\n", + "2018-09-11 22:25:56,276 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,277 : INFO : 0.0\n", + "2018-09-11 22:25:56,278 : INFO : First K WMD\n", + "2018-09-11 22:25:56,281 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,290 : INFO : [(-20.241191864013672, 9), (-19.673269271850586, 1), (-18.98501205444336, 4), (-19.25145721435547, 5), (-18.584327697753906, 2), (-14.393054008483887, 3), (-18.512760162353516, 7), (-17.829113006591797, 6), (-19.007692337036133, 8), (-0.0, 0)]\n", + "2018-09-11 22:25:56,292 : INFO : 0.0\n", + "2018-09-11 22:25:56,293 : INFO : P&P\n", + "2018-09-11 22:25:56,294 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,294 : INFO : Vocabulary size: 10 500\n", + "2018-09-11 22:25:56,296 : INFO : WCD\n", + "2018-09-11 22:25:56,298 : INFO : 0.0\n", + "2018-09-11 22:25:56,299 : INFO : First K WMD\n", + "2018-09-11 22:25:56,308 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,309 : INFO : [(-19.669864654541016, 5), (-19.503406524658203, 9), (-18.417016983032227, 1), (-19.35322380065918, 3), (-18.313684463500977, 0), (-16.85078239440918, 8), (-17.551610946655273, 7), (-18.557838439941406, 2), (-18.04465103149414, 6), (-17.569047927856445, 4)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:56,310 : INFO : 0.0\n", + "2018-09-11 22:25:56,311 : INFO : P&P\n", + "2018-09-11 22:25:56,312 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,313 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,316 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:56,318 : INFO : WCD\n", + "2018-09-11 22:25:56,320 : INFO : 0.0\n", + "2018-09-11 22:25:56,321 : INFO : First K WMD\n", + "2018-09-11 22:25:56,332 : INFO : [(-20.602643966674805, 4), (-18.422197341918945, 7), (-19.27498435974121, 5), (-17.853513717651367, 1), (-18.41405487060547, 2), (-18.674968719482422, 9), (-17.420278549194336, 3), (-16.40138053894043, 6), (-0.0, 0), (-16.361116409301758, 8)]\n", + "2018-09-11 22:25:56,333 : INFO : 0.0\n", + "2018-09-11 22:25:56,335 : INFO : P&P\n", + "2018-09-11 22:25:56,335 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:56,336 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,336 : INFO : WCD\n", + "2018-09-11 22:25:56,338 : INFO : 0.0\n", + "2018-09-11 22:25:56,339 : INFO : First K WMD\n", + "2018-09-11 22:25:56,343 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,344 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,349 : INFO : [(-21.374149322509766, 0), (-20.8618221282959, 7), (-19.587923049926758, 1), (-20.83434295654297, 3), (-20.525644302368164, 2), (-19.035072326660156, 5), (-18.278291702270508, 4), (-19.37213897705078, 8), (-19.95553970336914, 6), (-19.637042999267578, 9)]\n", + "2018-09-11 22:25:56,351 : INFO : 0.0\n", + "2018-09-11 22:25:56,352 : INFO : P&P\n", + "2018-09-11 22:25:56,353 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,356 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:56,357 : INFO : WCD\n", + "2018-09-11 22:25:56,360 : INFO : 0.0\n", + "2018-09-11 22:25:56,361 : INFO : First K WMD\n", + "2018-09-11 22:25:56,369 : INFO : [(-21.8686580657959, 8), (-20.331151962280273, 4), (-20.953304290771484, 9), (-16.907562255859375, 2), (-19.954133987426758, 1), (-18.657535552978516, 3), (-20.867197036743164, 7), (-0.0, 0), (-14.952792167663574, 5), (-15.454633712768555, 6)]\n", + "2018-09-11 22:25:56,371 : INFO : 0.0\n", + "2018-09-11 22:25:56,372 : INFO : P&P\n", + "2018-09-11 22:25:56,373 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,376 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,377 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:56,378 : INFO : WCD\n", + "2018-09-11 22:25:56,378 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,380 : INFO : 0.0\n", + "2018-09-11 22:25:56,381 : INFO : First K WMD\n", + "2018-09-11 22:25:56,392 : INFO : [(-18.752124786376953, 1), (-17.451623916625977, 8), (-15.795761108398438, 3), (-16.977909088134766, 2), (-16.240888595581055, 7), (-15.286792755126953, 4), (-13.392958641052246, 0), (-15.349056243896484, 5), (-15.304560661315918, 6), (-13.961997032165527, 9)]\n", + "2018-09-11 22:25:56,394 : INFO : 0.0\n", + "2018-09-11 22:25:56,395 : INFO : P&P\n", + "2018-09-11 22:25:56,395 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:56,396 : INFO : WCD\n", + "2018-09-11 22:25:56,396 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,398 : INFO : 0.0\n", + "2018-09-11 22:25:56,400 : INFO : First K WMD\n", + "2018-09-11 22:25:56,405 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,411 : INFO : [(-21.281923294067383, 5), (-21.131362915039062, 7), (-18.720335006713867, 2), (-19.33995819091797, 9), (-17.82650375366211, 6), (-18.53304100036621, 4), (-17.85382652282715, 8), (-16.682846069335938, 3), (-16.981651306152344, 1), (-0.0, 0)]\n", + "2018-09-11 22:25:56,413 : INFO : 0.0\n", + "2018-09-11 22:25:56,412 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,414 : INFO : P&P\n", + "2018-09-11 22:25:56,415 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,418 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:56,419 : INFO : WCD\n", + "2018-09-11 22:25:56,421 : INFO : 0.0\n", + "2018-09-11 22:25:56,422 : INFO : First K WMD\n", + "2018-09-11 22:25:56,428 : INFO : [(-22.85857391357422, 8), (-20.759292602539062, 7), (-19.237953186035156, 1), (-19.919740676879883, 9), (-16.082225799560547, 6), (-15.344205856323242, 3), (-17.06412124633789, 0), (-16.2430477142334, 2), (-14.672664642333984, 5), (-14.883575439453125, 4)]\n", + "2018-09-11 22:25:56,430 : INFO : 0.0\n", + "2018-09-11 22:25:56,431 : INFO : P&P\n", + "2018-09-11 22:25:56,432 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,436 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,453 : INFO : Vocabulary size: 20 500\n", + "2018-09-11 22:25:56,455 : INFO : WCD\n", + "2018-09-11 22:25:56,456 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,457 : INFO : 0.0\n", + "2018-09-11 22:25:56,458 : INFO : First K WMD\n", + "2018-09-11 22:25:56,458 : INFO : Vocabulary size: 24 500\n", + "2018-09-11 22:25:56,460 : INFO : WCD\n", + "2018-09-11 22:25:56,463 : INFO : 0.0\n", + "2018-09-11 22:25:56,465 : INFO : First K WMD\n", + "2018-09-11 22:25:56,466 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,470 : INFO : [(-20.117813110351562, 8), (-19.46587371826172, 5), (-19.412986755371094, 6), (-18.80164909362793, 3), (-18.860998153686523, 0), (-16.008939743041992, 2), (-18.713912963867188, 7), (-18.17359733581543, 9), (-18.007339477539062, 1), (-17.85538101196289, 4)]\n", + "2018-09-11 22:25:56,471 : INFO : 0.0\n", + "2018-09-11 22:25:56,472 : INFO : P&P\n", + "2018-09-11 22:25:56,473 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,483 : INFO : [(-20.55971908569336, 4), (-19.953222274780273, 3), (-18.91995620727539, 6), (-19.910104751586914, 9), (-18.03316879272461, 1), (-18.67792320251465, 8), (-18.2181453704834, 7), (-19.638927459716797, 5), (-18.625808715820312, 2), (-0.0, 0)]\n", + "2018-09-11 22:25:56,485 : INFO : 0.0\n", + "2018-09-11 22:25:56,486 : INFO : P&P\n", + "2018-09-11 22:25:56,487 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,492 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,499 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:56,500 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,501 : INFO : WCD\n", + "2018-09-11 22:25:56,505 : INFO : 0.0\n", + "2018-09-11 22:25:56,512 : INFO : First K WMD\n", + "2018-09-11 22:25:56,516 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:56,529 : INFO : [(-20.27021026611328, 6), (-19.24404525756836, 8), (-19.451385498046875, 9), (-18.993867874145508, 7), (-16.73453140258789, 5), (-17.953367233276367, 4), (-13.663261413574219, 0), (-16.310720443725586, 3), (-11.020773887634277, 2), (-15.205824851989746, 1)]\n", + "2018-09-11 22:25:56,526 : INFO : WCD\n", + "2018-09-11 22:25:56,534 : INFO : 0.0\n", + "2018-09-11 22:25:56,531 : INFO : 0.0\n", + "2018-09-11 22:25:56,536 : INFO : First K WMD\n", + "2018-09-11 22:25:56,538 : INFO : P&P\n", + "2018-09-11 22:25:56,538 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,550 : INFO : [(-21.184110641479492, 6), (-21.007747650146484, 7), (-20.280956268310547, 5), (-19.671165466308594, 4), (-19.43903160095215, 8), (-17.45673370361328, 1), (-18.226795196533203, 9), (-17.802753448486328, 3), (-14.128655433654785, 2), (-15.784743309020996, 0)]\n", + "2018-09-11 22:25:56,557 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,557 : INFO : 0.0\n", + "2018-09-11 22:25:56,572 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,577 : INFO : P&P\n", + "2018-09-11 22:25:56,581 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,593 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,615 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:56,616 : INFO : WCD\n", + "2018-09-11 22:25:56,618 : INFO : 0.0\n", + "2018-09-11 22:25:56,620 : INFO : First K WMD\n", + "2018-09-11 22:25:56,624 : INFO : Vocabulary size: 31 500\n", + "2018-09-11 22:25:56,625 : INFO : WCD\n", + "2018-09-11 22:25:56,625 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,627 : INFO : 0.0\n", + "2018-09-11 22:25:56,628 : INFO : [(-20.097440719604492, 9), (-19.77671241760254, 0), (-19.001277923583984, 1), (-19.652883529663086, 6), (-19.579944610595703, 7), (-16.13117027282715, 8), (-17.52218246459961, 4), (-18.04755401611328, 5), (-18.387664794921875, 2), (-18.3131103515625, 3)]\n", + "2018-09-11 22:25:56,628 : INFO : First K WMD\n", + "2018-09-11 22:25:56,629 : INFO : 0.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:56,630 : INFO : P&P\n", + "2018-09-11 22:25:56,631 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,633 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,642 : INFO : [(-20.412126541137695, 1), (-19.901622772216797, 6), (-18.32648468017578, 9), (-17.38874053955078, 3), (-15.66531753540039, 0), (-15.41468334197998, 8), (-16.87537384033203, 5), (-16.159189224243164, 7), (-16.56991195678711, 2), (-0.0, 4)]\n", + "2018-09-11 22:25:56,643 : INFO : 0.0\n", + "2018-09-11 22:25:56,644 : INFO : P&P\n", + "2018-09-11 22:25:56,645 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,652 : INFO : Vocabulary size: 10 500\n", + "2018-09-11 22:25:56,653 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,653 : INFO : WCD\n", + "2018-09-11 22:25:56,655 : INFO : 0.0\n", + "2018-09-11 22:25:56,656 : INFO : First K WMD\n", + "2018-09-11 22:25:56,662 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,664 : INFO : [(-20.33196449279785, 6), (-18.647045135498047, 9), (-18.983440399169922, 2), (-18.623008728027344, 8), (-17.595272064208984, 0), (-18.045982360839844, 1), (-17.248762130737305, 3), (-17.58696746826172, 7), (-18.10902214050293, 5), (-13.829314231872559, 4)]\n", + "2018-09-11 22:25:56,665 : INFO : 0.0\n", + "2018-09-11 22:25:56,666 : INFO : P&P\n", + "2018-09-11 22:25:56,668 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,669 : INFO : Vocabulary size: 32 500\n", + "2018-09-11 22:25:56,670 : INFO : WCD\n", + "2018-09-11 22:25:56,672 : INFO : 0.0\n", + "2018-09-11 22:25:56,673 : INFO : First K WMD\n", + "2018-09-11 22:25:56,681 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,689 : INFO : [(-22.07044219970703, 2), (-21.659181594848633, 3), (-20.52768325805664, 4), (-20.94886016845703, 6), (-20.785932540893555, 9), (-20.193801879882812, 8), (-19.265687942504883, 0), (-19.09726905822754, 5), (-20.510147094726562, 7), (-20.309865951538086, 1)]\n", + "2018-09-11 22:25:56,694 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,695 : INFO : 0.0\n", + "2018-09-11 22:25:56,709 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:56,714 : INFO : WCD\n", + "2018-09-11 22:25:56,719 : INFO : 0.0\n", + "2018-09-11 22:25:56,717 : INFO : P&P\n", + "2018-09-11 22:25:56,720 : INFO : First K WMD\n", + "2018-09-11 22:25:56,724 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,717 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,733 : INFO : [(-20.340932846069336, 1), (-19.765710830688477, 9), (-17.995956420898438, 6), (-19.51763343811035, 8), (-17.90077018737793, 2), (-16.81098747253418, 7), (-15.08531665802002, 0), (-16.306304931640625, 4), (-17.903594970703125, 3), (-16.8569278717041, 5)]\n", + "2018-09-11 22:25:56,735 : INFO : 0.0\n", + "2018-09-11 22:25:56,738 : INFO : P&P\n", + "2018-09-11 22:25:56,759 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,752 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:56,771 : INFO : WCD\n", + "2018-09-11 22:25:56,761 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,780 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,778 : INFO : 0.0\n", + "2018-09-11 22:25:56,790 : INFO : First K WMD\n", + "2018-09-11 22:25:56,801 : INFO : [(-20.518102645874023, 0), (-20.181081771850586, 2), (-20.189058303833008, 7), (-19.372835159301758, 4), (-18.38808250427246, 3), (-18.826770782470703, 6), (-19.220474243164062, 5), (-17.55050277709961, 8), (-19.19985008239746, 1), (-18.15557861328125, 9)]\n", + "2018-09-11 22:25:56,804 : INFO : 0.0\n", + "2018-09-11 22:25:56,805 : INFO : P&P\n", + "2018-09-11 22:25:56,806 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,811 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:56,812 : INFO : WCD\n", + "2018-09-11 22:25:56,814 : INFO : 0.0\n", + "2018-09-11 22:25:56,816 : INFO : First K WMD\n", + "2018-09-11 22:25:56,815 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,820 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,826 : INFO : [(-18.771013259887695, 7), (-17.883642196655273, 3), (-17.120973587036133, 0), (-16.673553466796875, 5), (-17.052391052246094, 6), (-15.042040824890137, 4), (-15.308180809020996, 9), (-13.870391845703125, 1), (-13.407732963562012, 2), (-16.297163009643555, 8)]\n", + "2018-09-11 22:25:56,827 : INFO : 0.0\n", + "2018-09-11 22:25:56,828 : INFO : Vocabulary size: 32 500\n", + "2018-09-11 22:25:56,828 : INFO : P&P\n", + "2018-09-11 22:25:56,829 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,829 : INFO : WCD\n", + "2018-09-11 22:25:56,832 : INFO : 0.0\n", + "2018-09-11 22:25:56,833 : INFO : First K WMD\n", + "2018-09-11 22:25:56,846 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,847 : INFO : [(-18.902536392211914, 7), (-18.701570510864258, 8), (-17.641250610351562, 6), (-17.992568969726562, 4), (-17.83708953857422, 3), (-16.842866897583008, 1), (-17.394668579101562, 2), (-15.95218276977539, 5), (-16.781620025634766, 9), (-0.0, 0)]\n", + "2018-09-11 22:25:56,849 : INFO : 0.0\n", + "2018-09-11 22:25:56,850 : INFO : P&P\n", + "2018-09-11 22:25:56,850 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,850 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:56,851 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,851 : INFO : WCD\n", + "2018-09-11 22:25:56,853 : INFO : 0.0\n", + "2018-09-11 22:25:56,854 : INFO : First K WMD\n", + "2018-09-11 22:25:56,861 : INFO : [(-23.28120994567871, 8), (-22.281097412109375, 5), (-20.390975952148438, 6), (-20.18604850769043, 3), (-19.777912139892578, 9), (-19.3448429107666, 2), (-20.2506046295166, 7), (-14.334126472473145, 1), (-16.832843780517578, 0), (-18.377260208129883, 4)]\n", + "2018-09-11 22:25:56,863 : INFO : 0.0\n", + "2018-09-11 22:25:56,864 : INFO : P&P\n", + "2018-09-11 22:25:56,865 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,872 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:56,873 : INFO : WCD\n", + "2018-09-11 22:25:56,875 : INFO : 0.0\n", + "2018-09-11 22:25:56,877 : INFO : First K WMD\n", + "2018-09-11 22:25:56,879 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,880 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,886 : INFO : [(-20.353641510009766, 3), (-20.342031478881836, 7), (-20.158363342285156, 8), (-20.25798797607422, 1), (-19.647441864013672, 9), (-19.24784278869629, 4), (-15.376696586608887, 2), (-0.0, 0), (-19.39280891418457, 6), (-18.912805557250977, 5)]\n", + "2018-09-11 22:25:56,887 : INFO : 0.0\n", + "2018-09-11 22:25:56,887 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:56,888 : INFO : P&P\n", + "2018-09-11 22:25:56,889 : INFO : WCD\n", + "2018-09-11 22:25:56,889 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,891 : INFO : 0.0\n", + "2018-09-11 22:25:56,892 : INFO : First K WMD\n", + "2018-09-11 22:25:56,900 : INFO : [(-19.086057662963867, 6), (-17.822635650634766, 9), (-17.788814544677734, 4), (-17.72650718688965, 3), (-17.293088912963867, 5), (-17.68606185913086, 2), (-17.575708389282227, 8), (-16.835384368896484, 0), (-17.364999771118164, 1), (-16.85243797302246, 7)]\n", + "2018-09-11 22:25:56,901 : INFO : 0.0\n", + "2018-09-11 22:25:56,902 : INFO : P&P\n", + "2018-09-11 22:25:56,903 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,909 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,911 : INFO : Vocabulary size: 21 500\n", + "2018-09-11 22:25:56,911 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,912 : INFO : WCD\n", + "2018-09-11 22:25:56,914 : INFO : 0.0\n", + "2018-09-11 22:25:56,915 : INFO : First K WMD\n", + "2018-09-11 22:25:56,924 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:56,926 : INFO : WCD\n", + "2018-09-11 22:25:56,927 : INFO : [(-19.733613967895508, 6), (-19.461563110351562, 2), (-19.20065689086914, 9), (-18.916105270385742, 4), (-18.660194396972656, 1), (-18.362037658691406, 8), (-18.51116371154785, 7), (-0.0, 0), (-18.46990966796875, 3), (-18.314712524414062, 5)]\n", + "2018-09-11 22:25:56,928 : INFO : 0.0\n", + "2018-09-11 22:25:56,928 : INFO : 0.0\n", + "2018-09-11 22:25:56,929 : INFO : First K WMD\n", + "2018-09-11 22:25:56,929 : INFO : P&P\n", + "2018-09-11 22:25:56,930 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,936 : INFO : [(-22.876094818115234, 6), (-21.31623649597168, 7), (-21.26250457763672, 9), (-19.03821563720703, 3), (-20.707509994506836, 8), (-21.16802406311035, 4), (-20.599077224731445, 2), (-15.944910049438477, 1), (-18.48786735534668, 0), (-20.585670471191406, 5)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:56,938 : INFO : 0.0\n", + "2018-09-11 22:25:56,938 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,939 : INFO : P&P\n", + "2018-09-11 22:25:56,940 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,942 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,952 : INFO : Vocabulary size: 34 500\n", + "2018-09-11 22:25:56,953 : INFO : WCD\n", + "2018-09-11 22:25:56,956 : INFO : 0.0\n", + "2018-09-11 22:25:56,957 : INFO : First K WMD\n", + "2018-09-11 22:25:56,962 : INFO : Vocabulary size: 5 500\n", + "2018-09-11 22:25:56,963 : INFO : WCD\n", + "2018-09-11 22:25:56,966 : INFO : 0.0\n", + "2018-09-11 22:25:56,967 : INFO : First K WMD\n", + "2018-09-11 22:25:56,967 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,973 : INFO : [(-18.67586326599121, 9), (-18.339338302612305, 4), (-16.85841941833496, 2), (-17.52264404296875, 5), (-17.893905639648438, 0), (-15.966352462768555, 1), (-16.120573043823242, 7), (-15.883320808410645, 8), (-16.411989212036133, 3), (-17.243392944335938, 6)]\n", + "2018-09-11 22:25:56,973 : INFO : [(-19.131900787353516, 9), (-19.057754516601562, 7), (-18.996713638305664, 4), (-18.18414878845215, 3), (-16.654300689697266, 8), (-17.403076171875, 6), (-17.176082611083984, 1), (-18.128170013427734, 5), (-18.063621520996094, 2), (-0.0, 0)]\n", + "2018-09-11 22:25:56,974 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,975 : INFO : 0.0\n", + "2018-09-11 22:25:56,975 : INFO : 0.0\n", + "2018-09-11 22:25:56,976 : INFO : P&P\n", + "2018-09-11 22:25:56,976 : INFO : P&P\n", + "2018-09-11 22:25:56,976 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,977 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:56,996 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:56,997 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:56,998 : INFO : WCD\n", + "2018-09-11 22:25:57,000 : INFO : 0.0\n", + "2018-09-11 22:25:57,001 : INFO : First K WMD\n", + "2018-09-11 22:25:57,006 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,007 : INFO : [(-21.097900390625, 4), (-20.06540298461914, 8), (-19.297210693359375, 7), (-18.055322647094727, 5), (-19.1801700592041, 9), (-16.239185333251953, 6), (-13.302366256713867, 2), (-12.196854591369629, 3), (-17.580883026123047, 1), (-0.0, 0)]\n", + "2018-09-11 22:25:57,009 : INFO : 0.0\n", + "2018-09-11 22:25:57,009 : INFO : P&P\n", + "2018-09-11 22:25:57,010 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,022 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:57,023 : INFO : WCD\n", + "2018-09-11 22:25:57,026 : INFO : 0.0\n", + "2018-09-11 22:25:57,027 : INFO : First K WMD\n", + "2018-09-11 22:25:57,027 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,032 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:57,033 : INFO : WCD\n", + "2018-09-11 22:25:57,035 : INFO : [(-21.890975952148438, 6), (-21.37122344970703, 4), (-20.618772506713867, 2), (-21.094715118408203, 7), (-20.35929298400879, 9), (-19.4316349029541, 1), (-20.081432342529297, 3), (-20.42452049255371, 5), (-19.96685028076172, 8), (-18.4957332611084, 0)]\n", + "2018-09-11 22:25:57,036 : INFO : 0.0\n", + "2018-09-11 22:25:57,035 : INFO : 0.0\n", + "2018-09-11 22:25:57,037 : INFO : P&P\n", + "2018-09-11 22:25:57,037 : INFO : First K WMD\n", + "2018-09-11 22:25:57,038 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,039 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,046 : INFO : [(-20.45553207397461, 6), (-20.23253059387207, 3), (-19.508365631103516, 9), (-19.881471633911133, 7), (-18.03549575805664, 8), (-18.895578384399414, 5), (-0.0, 0), (-18.258602142333984, 2), (-19.27317237854004, 1), (-17.853864669799805, 4)]\n", + "2018-09-11 22:25:57,047 : INFO : 0.0\n", + "2018-09-11 22:25:57,048 : INFO : P&P\n", + "2018-09-11 22:25:57,049 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,056 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,061 : INFO : Vocabulary size: 8 500\n", + "2018-09-11 22:25:57,063 : INFO : WCD\n", + "2018-09-11 22:25:57,065 : INFO : 0.0\n", + "2018-09-11 22:25:57,066 : INFO : First K WMD\n", + "2018-09-11 22:25:57,069 : INFO : Vocabulary size: 2 500\n", + "2018-09-11 22:25:57,071 : INFO : WCD\n", + "2018-09-11 22:25:57,072 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,073 : INFO : 0.0\n", + "2018-09-11 22:25:57,074 : INFO : First K WMD\n", + "2018-09-11 22:25:57,073 : INFO : [(-22.049890518188477, 9), (-22.042720794677734, 5), (-21.89765739440918, 6), (-21.65032196044922, 8), (-20.805912017822266, 7), (-19.898845672607422, 2), (-21.045583724975586, 4), (-20.416378021240234, 3), (-20.07786750793457, 1), (-19.39766502380371, 0)]\n", + "2018-09-11 22:25:57,075 : INFO : 0.0\n", + "2018-09-11 22:25:57,076 : INFO : P&P\n", + "2018-09-11 22:25:57,076 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,078 : INFO : [(-25.224641799926758, 9), (-24.421009063720703, 2), (-23.20033836364746, 5), (-22.73286247253418, 8), (-23.909584045410156, 7), (-23.13044548034668, 1), (-22.846546173095703, 4), (-19.2935791015625, 6), (-22.435665130615234, 3), (-22.434139251708984, 0)]\n", + "2018-09-11 22:25:57,079 : INFO : 0.0\n", + "2018-09-11 22:25:57,080 : INFO : P&P\n", + "2018-09-11 22:25:57,081 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,086 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,098 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:57,100 : INFO : WCD\n", + "2018-09-11 22:25:57,102 : INFO : 0.0\n", + "2018-09-11 22:25:57,102 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,103 : INFO : First K WMD\n", + "2018-09-11 22:25:57,104 : INFO : WCD\n", + "2018-09-11 22:25:57,105 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,106 : INFO : 0.0\n", + "2018-09-11 22:25:57,107 : INFO : First K WMD\n", + "2018-09-11 22:25:57,110 : INFO : [(-18.786243438720703, 9), (-18.13810920715332, 6), (-17.90003776550293, 2), (-17.188243865966797, 5), (-16.193376541137695, 0), (-17.431379318237305, 4), (-17.65343475341797, 7), (-15.405109405517578, 3), (-16.660449981689453, 8), (-14.184499740600586, 1)]\n", + "2018-09-11 22:25:57,111 : INFO : 0.0\n", + "2018-09-11 22:25:57,112 : INFO : P&P\n", + "2018-09-11 22:25:57,113 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,115 : INFO : [(-16.800188064575195, 7), (-15.372775077819824, 4), (-16.53750228881836, 8), (-14.575789451599121, 6), (-15.255448341369629, 9), (-13.727124214172363, 0), (-13.322602272033691, 1), (-14.493932723999023, 2), (-0.0, 5), (-14.954154014587402, 3)]\n", + "2018-09-11 22:25:57,117 : INFO : 0.0\n", + "2018-09-11 22:25:57,117 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,118 : INFO : P&P\n", + "2018-09-11 22:25:57,118 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,136 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:57,136 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,137 : INFO : WCD\n", + "2018-09-11 22:25:57,140 : INFO : 0.0\n", + "2018-09-11 22:25:57,140 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:57,140 : INFO : First K WMD\n", + "2018-09-11 22:25:57,141 : INFO : WCD\n", + "2018-09-11 22:25:57,143 : INFO : 0.0\n", + "2018-09-11 22:25:57,144 : INFO : First K WMD\n", + "2018-09-11 22:25:57,148 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,150 : INFO : [(-21.91238021850586, 3), (-19.932769775390625, 2), (-20.104358673095703, 7), (-19.58405303955078, 4), (-19.60428237915039, 1), (-18.062490463256836, 5), (-19.957860946655273, 6), (-18.279285430908203, 0), (-17.296199798583984, 9), (-18.28604507446289, 8)]\n", + "2018-09-11 22:25:57,150 : INFO : [(-21.05290985107422, 6), (-20.323368072509766, 7), (-19.440393447875977, 5), (-19.33774185180664, 8), (-18.908756256103516, 3), (-19.142431259155273, 0), (-19.18977928161621, 2), (-17.942398071289062, 4), (-18.65096092224121, 1), (-18.717016220092773, 9)]\n", + "2018-09-11 22:25:57,151 : INFO : 0.0\n", + "2018-09-11 22:25:57,152 : INFO : 0.0\n", + "2018-09-11 22:25:57,152 : INFO : P&P\n", + "2018-09-11 22:25:57,152 : INFO : P&P\n", + "2018-09-11 22:25:57,153 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,153 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,169 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,175 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:57,175 : INFO : Vocabulary size: 12 500\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:57,176 : INFO : WCD\n", + "2018-09-11 22:25:57,177 : INFO : WCD\n", + "2018-09-11 22:25:57,179 : INFO : 0.0\n", + "2018-09-11 22:25:57,179 : INFO : 0.0\n", + "2018-09-11 22:25:57,179 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,180 : INFO : First K WMD\n", + "2018-09-11 22:25:57,180 : INFO : First K WMD\n", + "2018-09-11 22:25:57,186 : INFO : [(-21.144359588623047, 7), (-20.713117599487305, 5), (-20.755130767822266, 9), (-19.895933151245117, 4), (-17.951595306396484, 1), (-18.24154281616211, 0), (-20.379283905029297, 8), (-19.393970489501953, 3), (-18.360334396362305, 6), (-0.0, 2)]\n", + "2018-09-11 22:25:57,187 : INFO : 0.0\n", + "2018-09-11 22:25:57,188 : INFO : [(-18.68290901184082, 7), (-17.395431518554688, 5), (-17.43307876586914, 6), (-17.24639892578125, 8), (-17.380598068237305, 2), (-17.050798416137695, 9), (-16.874797821044922, 0), (-16.620691299438477, 4), (-13.969255447387695, 3), (-15.286347389221191, 1)]\n", + "2018-09-11 22:25:57,188 : INFO : P&P\n", + "2018-09-11 22:25:57,189 : INFO : 0.0\n", + "2018-09-11 22:25:57,189 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,190 : INFO : P&P\n", + "2018-09-11 22:25:57,191 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,201 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,210 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,212 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:57,214 : INFO : WCD\n", + "2018-09-11 22:25:57,214 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,215 : INFO : WCD\n", + "2018-09-11 22:25:57,216 : INFO : 0.0\n", + "2018-09-11 22:25:57,217 : INFO : First K WMD\n", + "2018-09-11 22:25:57,218 : INFO : 0.0\n", + "2018-09-11 22:25:57,219 : INFO : First K WMD\n", + "2018-09-11 22:25:57,225 : INFO : [(-22.361406326293945, 8), (-21.58687400817871, 9), (-21.964616775512695, 7), (-21.424373626708984, 3), (-20.676761627197266, 4), (-20.511545181274414, 5), (-21.63990592956543, 6), (-19.868135452270508, 0), (-19.865257263183594, 2), (-18.43693733215332, 1)]\n", + "2018-09-11 22:25:57,226 : INFO : [(-20.027751922607422, 4), (-19.20995330810547, 6), (-17.68779754638672, 0), (-16.63555335998535, 8), (-18.898513793945312, 9), (-15.587748527526855, 1), (-17.529630661010742, 3), (-14.630555152893066, 5), (-15.585575103759766, 2), (-18.10595703125, 7)]\n", + "2018-09-11 22:25:57,226 : INFO : 0.0\n", + "2018-09-11 22:25:57,227 : INFO : 0.0\n", + "2018-09-11 22:25:57,227 : INFO : P&P\n", + "2018-09-11 22:25:57,228 : INFO : P&P\n", + "2018-09-11 22:25:57,228 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,229 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,235 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,238 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,250 : INFO : Vocabulary size: 21 500\n", + "2018-09-11 22:25:57,251 : INFO : WCD\n", + "2018-09-11 22:25:57,251 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:57,253 : INFO : WCD\n", + "2018-09-11 22:25:57,254 : INFO : 0.0\n", + "2018-09-11 22:25:57,255 : INFO : 0.0\n", + "2018-09-11 22:25:57,255 : INFO : First K WMD\n", + "2018-09-11 22:25:57,256 : INFO : First K WMD\n", + "2018-09-11 22:25:57,264 : INFO : [(-16.604692459106445, 8), (-16.022863388061523, 6), (-15.47705364227295, 4), (-15.064726829528809, 5), (-15.434925079345703, 7), (-14.983377456665039, 0), (-13.618267059326172, 2), (-14.432028770446777, 3), (-12.446974754333496, 9), (-13.707803726196289, 1)]\n", + "2018-09-11 22:25:57,265 : INFO : 0.0\n", + "2018-09-11 22:25:57,266 : INFO : P&P\n", + "2018-09-11 22:25:57,266 : INFO : [(-20.00905990600586, 6), (-18.92070770263672, 2), (-19.512685775756836, 9), (-17.731115341186523, 1), (-18.29514503479004, 4), (-17.88530731201172, 8), (-19.108762741088867, 5), (-0.0, 0), (-17.377065658569336, 3), (-17.950469970703125, 7)]\n", + "2018-09-11 22:25:57,267 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,268 : INFO : 0.0\n", + "2018-09-11 22:25:57,268 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,268 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,269 : INFO : P&P\n", + "2018-09-11 22:25:57,270 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,287 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,289 : INFO : WCD\n", + "2018-09-11 22:25:57,291 : INFO : 0.0\n", + "2018-09-11 22:25:57,291 : INFO : First K WMD\n", + "2018-09-11 22:25:57,297 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,301 : INFO : [(-21.380624771118164, 3), (-19.4506893157959, 5), (-20.279661178588867, 9), (-19.387510299682617, 4), (-18.392873764038086, 1), (-17.30426788330078, 8), (-17.718812942504883, 0), (-16.224260330200195, 6), (-16.09038543701172, 2), (-16.09711456298828, 7)]\n", + "2018-09-11 22:25:57,302 : INFO : 0.0\n", + "2018-09-11 22:25:57,303 : INFO : P&P\n", + "2018-09-11 22:25:57,304 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,307 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,313 : INFO : Vocabulary size: 22 500\n", + "2018-09-11 22:25:57,315 : INFO : WCD\n", + "2018-09-11 22:25:57,319 : INFO : 0.0\n", + "2018-09-11 22:25:57,321 : INFO : First K WMD\n", + "2018-09-11 22:25:57,326 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,326 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,327 : INFO : WCD\n", + "2018-09-11 22:25:57,330 : INFO : 0.0\n", + "2018-09-11 22:25:57,330 : INFO : First K WMD\n", + "2018-09-11 22:25:57,336 : INFO : [(-18.91649627685547, 8), (-18.713428497314453, 7), (-18.081764221191406, 5), (-18.562593460083008, 6), (-18.290908813476562, 1), (-17.195024490356445, 2), (-17.5906982421875, 9), (-16.815223693847656, 0), (-17.287675857543945, 4), (-17.37271499633789, 3)]\n", + "2018-09-11 22:25:57,338 : INFO : 0.0\n", + "2018-09-11 22:25:57,338 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,339 : INFO : P&P\n", + "2018-09-11 22:25:57,339 : INFO : [(-22.80150604248047, 2), (-20.86899757385254, 8), (-19.761884689331055, 9), (-20.30096435546875, 4), (-19.309965133666992, 1), (-15.192156791687012, 0), (-19.417016983032227, 6), (-18.944129943847656, 3), (-18.86288070678711, 5), (-17.898502349853516, 7)]\n", + "2018-09-11 22:25:57,340 : INFO : 0.0\n", + "2018-09-11 22:25:57,340 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,341 : INFO : P&P\n", + "2018-09-11 22:25:57,342 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,356 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,362 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:57,363 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,363 : INFO : WCD\n", + "2018-09-11 22:25:57,364 : INFO : WCD\n", + "2018-09-11 22:25:57,367 : INFO : 0.0\n", + "2018-09-11 22:25:57,366 : INFO : 0.0\n", + "2018-09-11 22:25:57,368 : INFO : First K WMD\n", + "2018-09-11 22:25:57,368 : INFO : First K WMD\n", + "2018-09-11 22:25:57,369 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,376 : INFO : [(-22.154094696044922, 3), (-21.50288963317871, 9), (-21.970237731933594, 2), (-21.28101921081543, 4), (-21.097623825073242, 0), (-21.296846389770508, 7), (-20.21702766418457, 8), (-21.086458206176758, 1), (-19.37128448486328, 5), (-19.855024337768555, 6)]\n", + "2018-09-11 22:25:57,376 : INFO : [(-21.451005935668945, 5), (-20.405900955200195, 9), (-20.070709228515625, 6), (-19.997880935668945, 2), (-18.686275482177734, 4), (-18.161518096923828, 3), (-18.983806610107422, 8), (-12.960904121398926, 0), (-17.680078506469727, 7), (-0.0, 1)]\n", + "2018-09-11 22:25:57,377 : INFO : 0.0\n", + "2018-09-11 22:25:57,378 : INFO : 0.0\n", + "2018-09-11 22:25:57,378 : INFO : P&P\n", + "2018-09-11 22:25:57,379 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,379 : INFO : P&P\n", + "2018-09-11 22:25:57,380 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,385 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,398 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,403 : INFO : Vocabulary size: 4 500\n", + "2018-09-11 22:25:57,403 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,404 : INFO : WCD\n", + "2018-09-11 22:25:57,404 : INFO : WCD\n", + "2018-09-11 22:25:57,406 : INFO : 0.0\n", + "2018-09-11 22:25:57,407 : INFO : First K WMD\n", + "2018-09-11 22:25:57,407 : INFO : 0.0\n", + "2018-09-11 22:25:57,408 : INFO : First K WMD\n", + "2018-09-11 22:25:57,413 : INFO : [(-22.16326141357422, 6), (-21.987701416015625, 4), (-21.876224517822266, 8), (-21.42550277709961, 1), (-20.607229232788086, 7), (-20.22481918334961, 5), (-18.295263290405273, 0), (-17.939590454101562, 3), (-20.349809646606445, 2), (-20.50543975830078, 9)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:57,415 : INFO : 0.0\n", + "2018-09-11 22:25:57,415 : INFO : [(-18.13376235961914, 8), (-16.11659812927246, 6), (-15.611549377441406, 4), (-15.593467712402344, 7), (-14.747987747192383, 3), (-15.354251861572266, 0), (-14.523130416870117, 1), (-14.768147468566895, 5), (-12.155661582946777, 2), (-14.562323570251465, 9)]\n", + "2018-09-11 22:25:57,415 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,416 : INFO : P&P\n", + "2018-09-11 22:25:57,417 : INFO : 0.0\n", + "2018-09-11 22:25:57,417 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,418 : INFO : P&P\n", + "2018-09-11 22:25:57,418 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,427 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,446 : INFO : Vocabulary size: 29 500\n", + "2018-09-11 22:25:57,450 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,455 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:57,455 : INFO : WCD\n", + "2018-09-11 22:25:57,457 : INFO : WCD\n", + "2018-09-11 22:25:57,464 : INFO : 0.0\n", + "2018-09-11 22:25:57,461 : INFO : 0.0\n", + "2018-09-11 22:25:57,467 : INFO : First K WMD\n", + "2018-09-11 22:25:57,473 : INFO : First K WMD\n", + "2018-09-11 22:25:57,479 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,485 : INFO : [(-21.577760696411133, 9), (-21.317546844482422, 7), (-21.091205596923828, 2), (-21.013357162475586, 5), (-19.545671463012695, 4), (-15.451701164245605, 3), (-18.167747497558594, 0), (-20.041683197021484, 6), (-19.742612838745117, 8), (-19.5399112701416, 1)]\n", + "2018-09-11 22:25:57,487 : INFO : [(-21.853181838989258, 4), (-20.243667602539062, 6), (-17.956932067871094, 7), (-17.043825149536133, 3), (-16.201303482055664, 9), (-16.086694717407227, 5), (-17.619895935058594, 8), (-16.1298885345459, 1), (-0.0, 0), (-16.10704803466797, 2)]\n", + "2018-09-11 22:25:57,488 : INFO : 0.0\n", + "2018-09-11 22:25:57,490 : INFO : 0.0\n", + "2018-09-11 22:25:57,484 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,509 : INFO : P&P\n", + "2018-09-11 22:25:57,514 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,489 : INFO : P&P\n", + "2018-09-11 22:25:57,520 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,521 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,527 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,543 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:57,544 : INFO : WCD\n", + "2018-09-11 22:25:57,546 : INFO : 0.0\n", + "2018-09-11 22:25:57,547 : INFO : First K WMD\n", + "2018-09-11 22:25:57,553 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,556 : INFO : [(-19.37550926208496, 6), (-18.194734573364258, 4), (-18.610692977905273, 8), (-17.953519821166992, 1), (-17.25320816040039, 3), (-14.075039863586426, 0), (-17.940555572509766, 9), (-15.161100387573242, 5), (-16.053403854370117, 7), (-14.826800346374512, 2)]\n", + "2018-09-11 22:25:57,557 : INFO : 0.0\n", + "2018-09-11 22:25:57,558 : INFO : P&P\n", + "2018-09-11 22:25:57,559 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,566 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,568 : INFO : WCD\n", + "2018-09-11 22:25:57,572 : INFO : 0.0\n", + "2018-09-11 22:25:57,572 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,574 : INFO : First K WMD\n", + "2018-09-11 22:25:57,578 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:57,579 : INFO : WCD\n", + "2018-09-11 22:25:57,582 : INFO : 0.0\n", + "2018-09-11 22:25:57,582 : INFO : First K WMD\n", + "2018-09-11 22:25:57,582 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,585 : INFO : [(-19.746971130371094, 5), (-19.725391387939453, 9), (-18.81338882446289, 1), (-18.47865104675293, 8), (-18.964054107666016, 7), (-13.76650619506836, 2), (-17.466121673583984, 4), (-17.43404769897461, 6), (-14.761163711547852, 3), (-0.0, 0)]\n", + "2018-09-11 22:25:57,586 : INFO : 0.0\n", + "2018-09-11 22:25:57,587 : INFO : P&P\n", + "2018-09-11 22:25:57,588 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,588 : INFO : [(-22.255353927612305, 7), (-21.47162628173828, 3), (-20.937490463256836, 9), (-21.269655227661133, 5), (-19.61402702331543, 2), (-20.78279685974121, 6), (-20.269344329833984, 4), (-20.28070068359375, 8), (-18.950916290283203, 0), (-15.441170692443848, 1)]\n", + "2018-09-11 22:25:57,589 : INFO : 0.0\n", + "2018-09-11 22:25:57,590 : INFO : P&P\n", + "2018-09-11 22:25:57,591 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,604 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,609 : INFO : Vocabulary size: 26 500\n", + "2018-09-11 22:25:57,610 : INFO : WCD\n", + "2018-09-11 22:25:57,613 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,613 : INFO : 0.0\n", + "2018-09-11 22:25:57,613 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:57,614 : INFO : WCD\n", + "2018-09-11 22:25:57,614 : INFO : First K WMD\n", + "2018-09-11 22:25:57,616 : INFO : 0.0\n", + "2018-09-11 22:25:57,617 : INFO : First K WMD\n", + "2018-09-11 22:25:57,626 : INFO : [(-21.67469024658203, 4), (-21.582937240600586, 6), (-20.143699645996094, 9), (-20.29949951171875, 5), (-21.424095153808594, 8), (-19.695537567138672, 3), (-19.93345069885254, 1), (-18.990398406982422, 2), (-19.147319793701172, 7), (-17.550071716308594, 0)]\n", + "2018-09-11 22:25:57,627 : INFO : [(-19.629873275756836, 1), (-19.55462646484375, 5), (-19.593027114868164, 7), (-19.454729080200195, 0), (-19.37546730041504, 2), (-19.486957550048828, 9), (-19.101947784423828, 4), (-19.11880111694336, 8), (-18.33714485168457, 6), (-18.772361755371094, 3)]\n", + "2018-09-11 22:25:57,627 : INFO : 0.0\n", + "2018-09-11 22:25:57,628 : INFO : 0.0\n", + "2018-09-11 22:25:57,628 : INFO : P&P\n", + "2018-09-11 22:25:57,628 : INFO : P&P\n", + "2018-09-11 22:25:57,629 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,629 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,633 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,641 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,650 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,652 : INFO : WCD\n", + "2018-09-11 22:25:57,651 : INFO : Vocabulary size: 30 500\n", + "2018-09-11 22:25:57,653 : INFO : WCD\n", + "2018-09-11 22:25:57,654 : INFO : 0.0\n", + "2018-09-11 22:25:57,654 : INFO : First K WMD\n", + "2018-09-11 22:25:57,655 : INFO : 0.0\n", + "2018-09-11 22:25:57,656 : INFO : First K WMD\n", + "2018-09-11 22:25:57,662 : INFO : [(-17.60083770751953, 7), (-16.98390769958496, 9), (-15.890336990356445, 8), (-14.36035442352295, 2), (-14.233834266662598, 4), (-14.287641525268555, 6), (-13.944066047668457, 5), (-12.921080589294434, 0), (-13.764425277709961, 1), (-13.383127212524414, 3)]\n", + "2018-09-11 22:25:57,663 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,663 : INFO : 0.0\n", + "2018-09-11 22:25:57,664 : INFO : P&P\n", + "2018-09-11 22:25:57,665 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,670 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,678 : INFO : [(-19.744014739990234, 8), (-18.894990921020508, 6), (-19.5317440032959, 4), (-18.453903198242188, 1), (-18.399995803833008, 5), (-19.18295669555664, 7), (-17.748186111450195, 2), (-0.0, 0), (-18.138776779174805, 9), (-18.0460147857666, 3)]\n", + "2018-09-11 22:25:57,685 : INFO : 0.0\n", + "2018-09-11 22:25:57,701 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:57,702 : INFO : P&P\n", + "2018-09-11 22:25:57,698 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,707 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,705 : INFO : WCD\n", + "2018-09-11 22:25:57,706 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,723 : INFO : 0.0\n", + "2018-09-11 22:25:57,725 : INFO : First K WMD\n", + "2018-09-11 22:25:57,734 : INFO : [(-20.43364143371582, 6), (-19.413610458374023, 4), (-18.308734893798828, 8), (-18.62076759338379, 1), (-18.65578269958496, 5), (-18.240699768066406, 7), (-16.519031524658203, 3), (-17.284412384033203, 2), (-17.866600036621094, 9), (-16.746999740600586, 0)]\n", + "2018-09-11 22:25:57,736 : INFO : 0.0\n", + "2018-09-11 22:25:57,737 : INFO : P&P\n", + "2018-09-11 22:25:57,738 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,750 : INFO : Vocabulary size: 32 500\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:57,755 : INFO : WCD\n", + "2018-09-11 22:25:57,770 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:57,772 : INFO : WCD\n", + "2018-09-11 22:25:57,771 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,773 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,776 : INFO : 0.0\n", + "2018-09-11 22:25:57,773 : INFO : 0.0\n", + "2018-09-11 22:25:57,777 : INFO : First K WMD\n", + "2018-09-11 22:25:57,777 : INFO : First K WMD\n", + "2018-09-11 22:25:57,788 : INFO : [(-21.864727020263672, 7), (-21.44795799255371, 1), (-19.778404235839844, 8), (-20.00682258605957, 6), (-18.828405380249023, 9), (-17.995689392089844, 2), (-18.10761070251465, 4), (-19.479032516479492, 3), (-18.34626007080078, 5), (-15.891501426696777, 0)]\n", + "2018-09-11 22:25:57,788 : INFO : [(-20.17798614501953, 9), (-19.862642288208008, 7), (-19.94223976135254, 8), (-19.309282302856445, 5), (-18.08249855041504, 3), (-19.61968421936035, 6), (-0.0, 0), (-18.393888473510742, 1), (-19.30235481262207, 4), (-16.567167282104492, 2)]\n", + "2018-09-11 22:25:57,789 : INFO : 0.0\n", + "2018-09-11 22:25:57,789 : INFO : 0.0\n", + "2018-09-11 22:25:57,790 : INFO : P&P\n", + "2018-09-11 22:25:57,790 : INFO : P&P\n", + "2018-09-11 22:25:57,791 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,791 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,798 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,808 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,811 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:57,812 : INFO : WCD\n", + "2018-09-11 22:25:57,814 : INFO : 0.0\n", + "2018-09-11 22:25:57,815 : INFO : First K WMD\n", + "2018-09-11 22:25:57,823 : INFO : [(-21.498004913330078, 4), (-20.151887893676758, 7), (-19.060951232910156, 5), (-18.424745559692383, 8), (-19.354236602783203, 9), (-17.759660720825195, 1), (-18.366500854492188, 3), (-18.101804733276367, 6), (-16.076417922973633, 2), (-18.050193786621094, 0)]\n", + "2018-09-11 22:25:57,824 : INFO : 0.0\n", + "2018-09-11 22:25:57,825 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,825 : INFO : P&P\n", + "2018-09-11 22:25:57,826 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,830 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:57,831 : INFO : WCD\n", + "2018-09-11 22:25:57,833 : INFO : 0.0\n", + "2018-09-11 22:25:57,834 : INFO : First K WMD\n", + "2018-09-11 22:25:57,840 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,842 : INFO : [(-20.59320640563965, 1), (-19.355209350585938, 3), (-18.33957290649414, 0), (-18.53177833557129, 2), (-19.234277725219727, 6), (-16.350963592529297, 5), (-17.268978118896484, 9), (-14.785677909851074, 7), (-16.134103775024414, 8), (-17.993520736694336, 4)]\n", + "2018-09-11 22:25:57,843 : INFO : 0.0\n", + "2018-09-11 22:25:57,844 : INFO : P&P\n", + "2018-09-11 22:25:57,845 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,849 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,850 : INFO : WCD\n", + "2018-09-11 22:25:57,851 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,853 : INFO : 0.0\n", + "2018-09-11 22:25:57,854 : INFO : First K WMD\n", + "2018-09-11 22:25:57,864 : INFO : [(-21.727447509765625, 0), (-21.25229263305664, 9), (-21.09114646911621, 1), (-20.920780181884766, 8), (-20.950037002563477, 7), (-21.00358009338379, 3), (-20.651304244995117, 5), (-20.390689849853516, 2), (-20.896121978759766, 4), (-20.880325317382812, 6)]\n", + "2018-09-11 22:25:57,865 : INFO : 0.0\n", + "2018-09-11 22:25:57,866 : INFO : P&P\n", + "2018-09-11 22:25:57,866 : INFO : Vocabulary size: 21 500\n", + "2018-09-11 22:25:57,867 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,867 : INFO : WCD\n", + "2018-09-11 22:25:57,869 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,870 : INFO : 0.0\n", + "2018-09-11 22:25:57,871 : INFO : First K WMD\n", + "2018-09-11 22:25:57,876 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,881 : INFO : [(-18.46405792236328, 0), (-17.8332576751709, 5), (-16.05845069885254, 4), (-16.504907608032227, 8), (-17.513967514038086, 1), (-15.484376907348633, 6), (-15.72335433959961, 3), (-15.479735374450684, 2), (-14.58847427368164, 7), (-16.358530044555664, 9)]\n", + "2018-09-11 22:25:57,882 : INFO : 0.0\n", + "2018-09-11 22:25:57,883 : INFO : P&P\n", + "2018-09-11 22:25:57,884 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,888 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:57,889 : INFO : WCD\n", + "2018-09-11 22:25:57,892 : INFO : 0.0\n", + "2018-09-11 22:25:57,893 : INFO : First K WMD\n", + "2018-09-11 22:25:57,897 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,902 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,902 : INFO : [(-20.20606803894043, 3), (-19.948631286621094, 0), (-19.76972770690918, 2), (-18.953990936279297, 5), (-19.75796890258789, 7), (-19.189287185668945, 4), (-19.462871551513672, 1), (-17.737163543701172, 9), (-17.110576629638672, 6), (-17.055856704711914, 8)]\n", + "2018-09-11 22:25:57,903 : INFO : 0.0\n", + "2018-09-11 22:25:57,904 : INFO : P&P\n", + "2018-09-11 22:25:57,905 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,907 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:57,908 : INFO : WCD\n", + "2018-09-11 22:25:57,910 : INFO : 0.0\n", + "2018-09-11 22:25:57,911 : INFO : First K WMD\n", + "2018-09-11 22:25:57,919 : INFO : [(-19.351966857910156, 5), (-19.054922103881836, 3), (-17.86665916442871, 1), (-18.679765701293945, 6), (-18.52164649963379, 0), (-17.64438247680664, 4), (-14.615104675292969, 2), (-18.077604293823242, 9), (-17.8496036529541, 7), (-17.065256118774414, 8)]\n", + "2018-09-11 22:25:57,920 : INFO : 0.0\n", + "2018-09-11 22:25:57,921 : INFO : P&P\n", + "2018-09-11 22:25:57,922 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,924 : INFO : Vocabulary size: 26 500\n", + "2018-09-11 22:25:57,926 : INFO : WCD\n", + "2018-09-11 22:25:57,928 : INFO : 0.0\n", + "2018-09-11 22:25:57,929 : INFO : First K WMD\n", + "2018-09-11 22:25:57,929 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,939 : INFO : [(-19.327064514160156, 7), (-18.214086532592773, 8), (-17.861164093017578, 9), (-16.95427894592285, 3), (-16.759111404418945, 6), (-12.936485290527344, 4), (-16.148221969604492, 2), (-15.7338228225708, 1), (-16.84369659423828, 0), (-14.42314624786377, 5)]\n", + "2018-09-11 22:25:57,941 : INFO : 0.0\n", + "2018-09-11 22:25:57,942 : INFO : P&P\n", + "2018-09-11 22:25:57,942 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,943 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:57,944 : INFO : WCD\n", + "2018-09-11 22:25:57,946 : INFO : 0.0\n", + "2018-09-11 22:25:57,947 : INFO : First K WMD\n", + "2018-09-11 22:25:57,955 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,957 : INFO : [(-20.089250564575195, 8), (-19.730701446533203, 7), (-19.663713455200195, 6), (-19.177753448486328, 9), (-19.021337509155273, 5), (-18.44422721862793, 4), (-17.253156661987305, 3), (-16.756454467773438, 1), (-17.697874069213867, 0), (-14.093807220458984, 2)]\n", + "2018-09-11 22:25:57,958 : INFO : 0.0\n", + "2018-09-11 22:25:57,959 : INFO : P&P\n", + "2018-09-11 22:25:57,960 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,963 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:57,964 : INFO : WCD\n", + "2018-09-11 22:25:57,966 : INFO : 0.0\n", + "2018-09-11 22:25:57,967 : INFO : First K WMD\n", + "2018-09-11 22:25:57,976 : INFO : [(-20.413959503173828, 8), (-19.945451736450195, 9), (-19.752487182617188, 6), (-19.31256866455078, 4), (-19.54108238220215, 7), (-18.953054428100586, 5), (-17.68225860595703, 1), (-16.35151481628418, 0), (-17.884000778198242, 3), (-19.17107582092285, 2)]\n", + "2018-09-11 22:25:57,977 : INFO : 0.0\n", + "2018-09-11 22:25:57,978 : INFO : P&P\n", + "2018-09-11 22:25:57,979 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,980 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,981 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:57,981 : INFO : WCD\n", + "2018-09-11 22:25:57,984 : INFO : 0.0\n", + "2018-09-11 22:25:57,985 : INFO : First K WMD\n", + "2018-09-11 22:25:57,994 : INFO : [(-18.673442840576172, 8), (-18.473421096801758, 7), (-17.347429275512695, 3), (-18.06159210205078, 5), (-18.088529586791992, 1), (-17.240461349487305, 0), (-14.854720115661621, 6), (-17.044113159179688, 2), (-17.623769760131836, 9), (-18.069400787353516, 4)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:57,995 : INFO : 0.0\n", + "2018-09-11 22:25:57,996 : INFO : P&P\n", + "2018-09-11 22:25:57,997 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:57,998 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:57,998 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:57,999 : INFO : WCD\n", + "2018-09-11 22:25:58,000 : INFO : built Dictionary(58 unique tokens: ['bayt', 'tell', 'almost', 'cv', 'respond']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:25:58,001 : INFO : 0.0\n", + "2018-09-11 22:25:58,002 : INFO : First K WMD\n", + "2018-09-11 22:25:58,008 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,009 : INFO : [(-23.84600830078125, 3), (-21.9279727935791, 7), (-23.31334686279297, 8), (-21.53450584411621, 6), (-20.583724975585938, 5), (-20.156784057617188, 9), (-19.488155364990234, 1), (-20.923620223999023, 4), (-0.0, 0), (-19.14923667907715, 2)]\n", + "2018-09-11 22:25:58,011 : INFO : 0.0\n", + "2018-09-11 22:25:58,011 : INFO : P&P\n", + "2018-09-11 22:25:58,012 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,018 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:58,019 : INFO : WCD\n", + "2018-09-11 22:25:58,021 : INFO : 0.0\n", + "2018-09-11 22:25:58,022 : INFO : First K WMD\n", + "2018-09-11 22:25:58,031 : INFO : [(-18.945056915283203, 0), (-18.631534576416016, 8), (-16.424213409423828, 6), (-18.608795166015625, 2), (-18.58013916015625, 9), (-16.361299514770508, 3), (-16.361759185791016, 4), (-17.40289306640625, 7), (-18.337038040161133, 5), (-18.180692672729492, 1)]\n", + "2018-09-11 22:25:58,032 : INFO : 0.0\n", + "2018-09-11 22:25:58,033 : INFO : P&P\n", + "2018-09-11 22:25:58,032 : INFO : Vocabulary size: 27 500\n", + "2018-09-11 22:25:58,034 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,034 : INFO : WCD\n", + "2018-09-11 22:25:58,035 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,036 : INFO : 0.0\n", + "2018-09-11 22:25:58,037 : INFO : First K WMD\n", + "2018-09-11 22:25:58,048 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,049 : INFO : [(-20.40915298461914, 6), (-19.64225959777832, 7), (-20.07440948486328, 5), (-18.939409255981445, 9), (-18.872684478759766, 3), (-19.419071197509766, 1), (-18.93073844909668, 0), (-18.43320655822754, 8), (-18.806991577148438, 4), (-18.65021514892578, 2)]\n", + "2018-09-11 22:25:58,050 : INFO : built Dictionary(35 unique tokens: ['getting', 'get', 'decent', 'bayt', 'almost']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:25:58,050 : INFO : 0.0\n", + "2018-09-11 22:25:58,051 : INFO : P&P\n", + "2018-09-11 22:25:58,052 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,054 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:58,055 : INFO : WCD\n", + "2018-09-11 22:25:58,057 : INFO : 0.0\n", + "2018-09-11 22:25:58,057 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,058 : INFO : First K WMD\n", + "2018-09-11 22:25:58,059 : INFO : built Dictionary(31 unique tokens: ['get', 'know', 'greece', 'link', 'long']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:25:58,060 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,067 : INFO : [(-22.628143310546875, 5), (-22.15297508239746, 3), (-21.49272918701172, 9), (-20.9246826171875, 8), (-20.19110107421875, 4), (-18.593515396118164, 1), (-20.259952545166016, 6), (-20.374250411987305, 7), (-18.432727813720703, 2), (-18.0457763671875, 0)]\n", + "2018-09-11 22:25:58,068 : INFO : 0.0\n", + "2018-09-11 22:25:58,069 : INFO : P&P\n", + "2018-09-11 22:25:58,070 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,070 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:25:58,072 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,072 : INFO : Vocabulary size: 26 500\n", + "2018-09-11 22:25:58,073 : INFO : built Dictionary(28 unique tokens: ['transit', 'know', 'link', 'explain', 'bona']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:25:58,073 : INFO : WCD\n", + "2018-09-11 22:25:58,075 : INFO : 0.0\n", + "2018-09-11 22:25:58,076 : INFO : First K WMD\n", + "2018-09-11 22:25:58,083 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,085 : INFO : built Dictionary(26 unique tokens: ['qatar', 'vacation', 'link', 'explain', 'bona']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:25:58,088 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,088 : INFO : [(-17.99260711669922, 6), (-17.80816650390625, 8), (-17.791017532348633, 2), (-17.452917098999023, 3), (-17.327585220336914, 0), (-17.523666381835938, 5), (-17.710594177246094, 7), (-17.26841163635254, 1), (-16.89826011657715, 9), (-16.92534828186035, 4)]\n", + "2018-09-11 22:25:58,089 : INFO : 0.0\n", + "2018-09-11 22:25:58,090 : INFO : P&P\n", + "2018-09-11 22:25:58,091 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:58,091 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,092 : INFO : WCD\n", + "2018-09-11 22:25:58,093 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,094 : INFO : 0.0\n", + "2018-09-11 22:25:58,094 : INFO : First K WMD\n", + "2018-09-11 22:25:58,094 : INFO : built Dictionary(53 unique tokens: ['bundle', 'ok', 'link', 'forgets', 'care']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:25:58,097 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,099 : INFO : built Dictionary(70 unique tokens: ['work', 'actually', 'bayt', 'tell', 'almost']...) from 2 documents (total 90 corpus positions)\n", + "2018-09-11 22:25:58,100 : INFO : [(-19.72264289855957, 7), (-17.86649513244629, 2), (-18.828950881958008, 9), (-16.582509994506836, 3), (-16.81943702697754, 6), (-18.049455642700195, 8), (-16.324005126953125, 4), (-14.684280395507812, 0), (-15.579146385192871, 1), (-12.827495574951172, 5)]\n", + "2018-09-11 22:25:58,102 : INFO : 0.0\n", + "2018-09-11 22:25:58,103 : INFO : P&P\n", + "2018-09-11 22:25:58,103 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,110 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,111 : INFO : WCD\n", + "2018-09-11 22:25:58,113 : INFO : 0.0\n", + "2018-09-11 22:25:58,114 : INFO : First K WMD\n", + "2018-09-11 22:25:58,115 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,121 : INFO : [(-21.624691009521484, 4), (-20.768754959106445, 2), (-20.423534393310547, 7), (-20.320222854614258, 1), (-19.673490524291992, 3), (-19.364986419677734, 9), (-19.676713943481445, 8), (-19.16258430480957, 5), (-17.598079681396484, 6), (-0.0, 0)]\n", + "2018-09-11 22:25:58,122 : INFO : 0.0\n", + "2018-09-11 22:25:58,123 : INFO : P&P\n", + "2018-09-11 22:25:58,124 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,124 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:58,125 : INFO : WCD\n", + "2018-09-11 22:25:58,127 : INFO : 0.0\n", + "2018-09-11 22:25:58,128 : INFO : First K WMD\n", + "2018-09-11 22:25:58,129 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,131 : INFO : built Dictionary(56 unique tokens: ['hotel', 'vacation', 'link', 'riyals', 'alone']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:25:58,137 : INFO : [(-17.326887130737305, 8), (-16.12946891784668, 7), (-16.58684730529785, 6), (-15.504143714904785, 1), (-15.795284271240234, 0), (-16.041828155517578, 9), (-15.549420356750488, 5), (-14.6519136428833, 4), (-15.186991691589355, 3), (-14.063722610473633, 2)]\n", + "2018-09-11 22:25:58,138 : INFO : 0.0\n", + "2018-09-11 22:25:58,139 : INFO : P&P\n", + "2018-09-11 22:25:58,140 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,141 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,145 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:58,146 : INFO : WCD\n", + "2018-09-11 22:25:58,148 : INFO : 0.0\n", + "2018-09-11 22:25:58,149 : INFO : First K WMD\n", + "2018-09-11 22:25:58,156 : INFO : [(-21.386402130126953, 9), (-21.370580673217773, 5), (-21.249544143676758, 8), (-20.429731369018555, 1), (-21.061601638793945, 7), (-19.711387634277344, 4), (-19.849960327148438, 2), (-0.0, 0), (-19.86851692199707, 6), (-19.881818771362305, 3)]\n", + "2018-09-11 22:25:58,158 : INFO : 0.0\n", + "2018-09-11 22:25:58,158 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:25:58,159 : INFO : P&P\n", + "2018-09-11 22:25:58,159 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:58,159 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:58,159 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,160 : INFO : built Dictionary(40 unique tokens: ['getting', 'get', 'also', 'bayt', 'back']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:25:58,160 : INFO : WCD\n", + "2018-09-11 22:25:58,162 : INFO : 0.0\n", + "2018-09-11 22:25:58,163 : INFO : First K WMD\n", + "2018-09-11 22:25:58,166 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,167 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,168 : INFO : built Dictionary(35 unique tokens: ['qualify', 'someone', 'qatar', 'explain', 'vacation']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:25:58,170 : INFO : [(-22.46072006225586, 7), (-19.800731658935547, 9), (-18.909976959228516, 6), (-19.044780731201172, 8), (-17.199432373046875, 5), (-12.679047584533691, 3), (-18.82349967956543, 4), (-15.129741668701172, 2), (-15.364179611206055, 0), (-13.677042007446289, 1)]\n", + "2018-09-11 22:25:58,171 : INFO : 0.0\n", + "2018-09-11 22:25:58,171 : INFO : P&P\n", + "2018-09-11 22:25:58,171 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:25:58,172 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,172 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,173 : INFO : built Dictionary(57 unique tokens: ['selected', 'almost', 'cv', 'decent', 'police']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:25:58,179 : INFO : Vocabulary size: 35 500\n", + "2018-09-11 22:25:58,180 : INFO : WCD\n", + "2018-09-11 22:25:58,182 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:25:58,182 : INFO : 0.0\n", + "2018-09-11 22:25:58,183 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,183 : INFO : First K WMD\n", + "2018-09-11 22:25:58,184 : INFO : built Dictionary(52 unique tokens: ['link', 'please', 'variety', 'project', 'convinced']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:25:58,191 : INFO : Vocabulary size: 8 500\n", + "2018-09-11 22:25:58,192 : INFO : WCD\n", + "2018-09-11 22:25:58,193 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,194 : INFO : 0.0\n", + "2018-09-11 22:25:58,195 : INFO : First K WMD\n", + "2018-09-11 22:25:58,196 : INFO : [(-19.923450469970703, 3), (-19.591402053833008, 9), (-19.621749877929688, 8), (-17.76327896118164, 2), (-18.80369758605957, 6), (-18.084077835083008, 7), (-15.335987091064453, 1), (-0.0, 0), (-17.16387939453125, 4), (-18.287742614746094, 5)]\n", + "2018-09-11 22:25:58,197 : INFO : 0.0\n", + "2018-09-11 22:25:58,198 : INFO : P&P\n", + "2018-09-11 22:25:58,199 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,200 : INFO : [(-20.864824295043945, 5), (-19.79900550842285, 8), (-20.41412353515625, 6), (-19.539037704467773, 7), (-18.403043746948242, 9), (-16.76972770690918, 4), (-19.58506202697754, 2), (-11.653465270996094, 0), (-12.726058006286621, 1), (-17.847679138183594, 3)]\n", + "2018-09-11 22:25:58,201 : INFO : 0.0\n", + "2018-09-11 22:25:58,202 : INFO : P&P\n", + "2018-09-11 22:25:58,203 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,208 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,210 : INFO : built Dictionary(67 unique tokens: ['bayt', 'almost', 'speaking', 'years', 'decent']...) from 2 documents (total 75 corpus positions)\n", + "2018-09-11 22:25:58,211 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:25:58,212 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,213 : INFO : built Dictionary(45 unique tokens: ['work', 'qnb', 'course', 'link', 'cards']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:25:58,220 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,221 : INFO : Vocabulary size: 36 500\n", + "2018-09-11 22:25:58,222 : INFO : WCD\n", + "2018-09-11 22:25:58,224 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:58,224 : INFO : 0.0\n", + "2018-09-11 22:25:58,225 : INFO : WCD\n", + "2018-09-11 22:25:58,225 : INFO : First K WMD\n", + "2018-09-11 22:25:58,227 : INFO : 0.0\n", + "2018-09-11 22:25:58,227 : INFO : First K WMD\n", + "2018-09-11 22:25:58,235 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,236 : INFO : [(-21.4023494720459, 5), (-20.61532211303711, 7), (-20.2412109375, 3), (-20.37481689453125, 6), (-18.52897071838379, 2), (-19.522056579589844, 4), (-19.811962127685547, 8), (-19.67626953125, 9), (-16.742000579833984, 1), (-15.945027351379395, 0)]\n", + "2018-09-11 22:25:58,236 : INFO : built Dictionary(52 unique tokens: ['filled', 'ok', 'form', 'received', 'contacts']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:25:58,237 : INFO : 0.0\n", + "2018-09-11 22:25:58,237 : INFO : [(-20.831825256347656, 2), (-19.78970718383789, 1), (-20.186187744140625, 6), (-18.98549461364746, 9), (-17.76701545715332, 5), (-18.715290069580078, 3), (-19.431543350219727, 8), (-18.426170349121094, 7), (-0.0, 0), (-17.453073501586914, 4)]\n", + "2018-09-11 22:25:58,238 : INFO : P&P\n", + "2018-09-11 22:25:58,238 : INFO : 0.0\n", + "2018-09-11 22:25:58,238 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,239 : INFO : P&P\n", + "2018-09-11 22:25:58,240 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,247 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,259 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:58,259 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,259 : INFO : Vocabulary size: 5 500\n", + "2018-09-11 22:25:58,260 : INFO : WCD\n", + "2018-09-11 22:25:58,261 : INFO : built Dictionary(48 unique tokens: ['bayt', 'better', 'almost', 'cv', 'trust']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:25:58,261 : INFO : WCD\n", + "2018-09-11 22:25:58,262 : INFO : 0.0\n", + "2018-09-11 22:25:58,263 : INFO : 0.0\n", + "2018-09-11 22:25:58,263 : INFO : First K WMD\n", + "2018-09-11 22:25:58,264 : INFO : First K WMD\n", + "2018-09-11 22:25:58,264 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,265 : INFO : built Dictionary(58 unique tokens: ['vacation', 'link', 'soon', 'contacts', 'suggest']...) from 2 documents (total 71 corpus positions)\n", + "2018-09-11 22:25:58,269 : INFO : [(-19.098276138305664, 5), (-17.65677833557129, 0), (-16.656993865966797, 2), (-17.410911560058594, 4), (-15.904337882995605, 8), (-16.37093162536621, 6), (-16.18189239501953, 1), (-11.612298011779785, 9), (-15.42251968383789, 3), (-15.863113403320312, 7)]\n", + "2018-09-11 22:25:58,270 : INFO : 0.0\n", + "2018-09-11 22:25:58,270 : INFO : P&P\n", + "2018-09-11 22:25:58,271 : INFO : [(-21.22176170349121, 7), (-20.88051986694336, 8), (-19.66781997680664, 6), (-20.613101959228516, 5), (-19.393814086914062, 3), (-18.846433639526367, 4), (-18.52078628540039, 1), (-20.223817825317383, 2), (-16.708871841430664, 0), (-19.289073944091797, 9)]\n", + "2018-09-11 22:25:58,271 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,271 : INFO : 0.0\n", + "2018-09-11 22:25:58,272 : INFO : P&P\n", + "2018-09-11 22:25:58,273 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,275 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,283 : INFO : built Dictionary(63 unique tokens: ['know', 'bayt', 'almost', 'cv', 'decent']...) from 2 documents (total 77 corpus positions)\n", + "2018-09-11 22:25:58,291 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:58,292 : INFO : WCD\n", + "2018-09-11 22:25:58,293 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:58,294 : INFO : 0.0\n", + "2018-09-11 22:25:58,294 : INFO : WCD\n", + "2018-09-11 22:25:58,295 : INFO : First K WMD\n", + "2018-09-11 22:25:58,296 : INFO : 0.0\n", + "2018-09-11 22:25:58,297 : INFO : First K WMD\n", + "2018-09-11 22:25:58,297 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:25:58,301 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,303 : INFO : [(-20.8459529876709, 4), (-20.228158950805664, 9), (-18.002117156982422, 1), (-19.179546356201172, 6), (-18.391809463500977, 5), (-17.92734718322754, 8), (-17.91477394104004, 3), (-18.717327117919922, 7), (-17.90180778503418, 2), (-0.0, 0)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:58,304 : INFO : 0.0\n", + "2018-09-11 22:25:58,305 : INFO : P&P\n", + "2018-09-11 22:25:58,305 : INFO : [(-21.058446884155273, 6), (-20.922012329101562, 1), (-19.987409591674805, 3), (-18.932270050048828, 9), (-17.863933563232422, 5), (-18.062917709350586, 2), (-19.771121978759766, 7), (-18.137680053710938, 8), (-18.860422134399414, 4), (-17.472457885742188, 0)]\n", + "2018-09-11 22:25:58,306 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,306 : INFO : 0.0\n", + "2018-09-11 22:25:58,307 : INFO : P&P\n", + "2018-09-11 22:25:58,307 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,327 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:58,327 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,327 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,328 : INFO : WCD\n", + "2018-09-11 22:25:58,328 : INFO : WCD\n", + "2018-09-11 22:25:58,328 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,329 : INFO : built Dictionary(57 unique tokens: ['entry', 'bayt', 'ok', 'almost', 'cv']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:25:58,330 : INFO : 0.0\n", + "2018-09-11 22:25:58,330 : INFO : 0.0\n", + "2018-09-11 22:25:58,331 : INFO : First K WMD\n", + "2018-09-11 22:25:58,331 : INFO : First K WMD\n", + "2018-09-11 22:25:58,340 : INFO : [(-18.975263595581055, 6), (-18.88460922241211, 7), (-18.263364791870117, 2), (-18.832904815673828, 9), (-18.321569442749023, 8), (-16.734058380126953, 1), (-17.165796279907227, 4), (-16.804899215698242, 0), (-18.167964935302734, 5), (-15.269024848937988, 3)]\n", + "2018-09-11 22:25:58,340 : INFO : [(-21.296541213989258, 2), (-20.57115936279297, 5), (-19.895736694335938, 6), (-20.29153823852539, 9), (-20.339290618896484, 8), (-18.552072525024414, 1), (-19.013574600219727, 0), (-19.991029739379883, 4), (-19.67312240600586, 7), (-19.18238067626953, 3)]\n", + "2018-09-11 22:25:58,341 : INFO : 0.0\n", + "2018-09-11 22:25:58,341 : INFO : 0.0\n", + "2018-09-11 22:25:58,342 : INFO : P&P\n", + "2018-09-11 22:25:58,342 : INFO : P&P\n", + "2018-09-11 22:25:58,342 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,343 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,353 : INFO : creating matrix with 10 documents and 462807 features\n", + "2018-09-11 22:25:58,362 : INFO : Vocabulary size: 16 500\n", + "2018-09-11 22:25:58,364 : INFO : WCD\n", + "2018-09-11 22:25:58,363 : INFO : Vocabulary size: 32 500\n", + "2018-09-11 22:25:58,365 : INFO : WCD\n", + "2018-09-11 22:25:58,366 : INFO : 0.0\n", + "2018-09-11 22:25:58,366 : INFO : First K WMD\n", + "2018-09-11 22:25:58,367 : INFO : 0.0\n", + "2018-09-11 22:25:58,367 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:25:58,368 : INFO : First K WMD\n", + "2018-09-11 22:25:58,368 : INFO : built Dictionary(57 unique tokens: ['know', 'bayt', 'better', 'almost', 'cv']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:25:58,375 : INFO : [(-19.403745651245117, 2), (-18.52483558654785, 6), (-18.284198760986328, 8), (-18.021696090698242, 5), (-16.991546630859375, 3), (-16.766653060913086, 0), (-16.994873046875, 9), (-18.010400772094727, 7), (-17.216787338256836, 1), (-16.844579696655273, 4)]\n", + "2018-09-11 22:25:58,376 : INFO : 0.0\n", + "2018-09-11 22:25:58,377 : INFO : P&P\n", + "2018-09-11 22:25:58,377 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,380 : INFO : [(-19.267576217651367, 6), (-18.865331649780273, 7), (-17.662851333618164, 8), (-18.69745445251465, 5), (-18.74102783203125, 4), (-16.958696365356445, 1), (-16.768390655517578, 2), (-16.4675350189209, 9), (-15.180002212524414, 3), (-0.0, 0)]\n", + "2018-09-11 22:25:58,382 : INFO : 0.0\n", + "2018-09-11 22:25:58,383 : INFO : P&P\n", + "2018-09-11 22:25:58,383 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,397 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,398 : INFO : WCD\n", + "2018-09-11 22:25:58,400 : INFO : 0.0\n", + "2018-09-11 22:25:58,401 : INFO : First K WMD\n", + "2018-09-11 22:25:58,403 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:58,404 : INFO : WCD\n", + "2018-09-11 22:25:58,404 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:25:58,406 : INFO : 0.0\n", + "2018-09-11 22:25:58,407 : INFO : First K WMD\n", + "2018-09-11 22:25:58,408 : INFO : [(-20.162593841552734, 4), (-18.168127059936523, 6), (-19.348899841308594, 2), (-17.741790771484375, 3), (-18.010265350341797, 7), (-17.766538619995117, 9), (-18.828824996948242, 8), (-16.618709564208984, 0), (-16.859935760498047, 5), (-17.24017906188965, 1)]\n", + "2018-09-11 22:25:58,409 : INFO : 0.0\n", + "2018-09-11 22:25:58,410 : INFO : P&P\n", + "2018-09-11 22:25:58,411 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,415 : INFO : [(-21.9173583984375, 1), (-19.927223205566406, 3), (-19.455461502075195, 7), (-19.831680297851562, 4), (-18.729202270507812, 5), (-19.096237182617188, 0), (-18.28215217590332, 2), (-18.692344665527344, 6), (-18.678312301635742, 9), (-18.080745697021484, 8)]\n", + "2018-09-11 22:25:58,416 : INFO : 0.0\n", + "2018-09-11 22:25:58,417 : INFO : P&P\n", + "2018-09-11 22:25:58,418 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,430 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:58,432 : INFO : WCD\n", + "2018-09-11 22:25:58,434 : INFO : 0.0\n", + "2018-09-11 22:25:58,434 : INFO : First K WMD\n", + "2018-09-11 22:25:58,436 : INFO : Vocabulary size: 24 500\n", + "2018-09-11 22:25:58,437 : INFO : WCD\n", + "2018-09-11 22:25:58,439 : INFO : 0.0\n", + "2018-09-11 22:25:58,440 : INFO : First K WMD\n", + "2018-09-11 22:25:58,444 : INFO : [(-19.88253402709961, 7), (-19.44063949584961, 2), (-19.006885528564453, 8), (-18.25164031982422, 9), (-17.398868560791016, 0), (-18.194162368774414, 5), (-17.272537231445312, 6), (-17.853717803955078, 4), (-18.053218841552734, 3), (-16.743635177612305, 1)]\n", + "2018-09-11 22:25:58,445 : INFO : 0.0\n", + "2018-09-11 22:25:58,445 : INFO : P&P\n", + "2018-09-11 22:25:58,446 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,448 : INFO : [(-20.637126922607422, 7), (-18.52520751953125, 3), (-19.945568084716797, 6), (-18.422931671142578, 8), (-17.84210777282715, 1), (-19.276573181152344, 9), (-15.418831825256348, 0), (-16.296709060668945, 5), (-16.771074295043945, 2), (-0.0, 4)]\n", + "2018-09-11 22:25:58,449 : INFO : 0.0\n", + "2018-09-11 22:25:58,450 : INFO : P&P\n", + "2018-09-11 22:25:58,450 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,465 : INFO : Vocabulary size: 17 500\n", + "2018-09-11 22:25:58,466 : INFO : WCD\n", + "2018-09-11 22:25:58,468 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:58,468 : INFO : 0.0\n", + "2018-09-11 22:25:58,469 : INFO : WCD\n", + "2018-09-11 22:25:58,469 : INFO : First K WMD\n", + "2018-09-11 22:25:58,471 : INFO : 0.0\n", + "2018-09-11 22:25:58,472 : INFO : First K WMD\n", + "2018-09-11 22:25:58,477 : INFO : [(-20.844030380249023, 3), (-18.84214973449707, 2), (-19.757312774658203, 1), (-18.419343948364258, 7), (-17.478425979614258, 0), (-16.97471046447754, 5), (-18.42815399169922, 6), (-14.489492416381836, 9), (-16.982568740844727, 8), (-17.407611846923828, 4)]\n", + "2018-09-11 22:25:58,478 : INFO : [(-20.63796043395996, 8), (-17.62754249572754, 3), (-16.24697494506836, 0), (-16.80132293701172, 2), (-16.802032470703125, 7), (-15.279682159423828, 1), (-15.229570388793945, 5), (-16.733245849609375, 9), (-16.687870025634766, 6), (-14.57418441772461, 4)]\n", + "2018-09-11 22:25:58,478 : INFO : 0.0\n", + "2018-09-11 22:25:58,479 : INFO : 0.0\n", + "2018-09-11 22:25:58,479 : INFO : P&P\n", + "2018-09-11 22:25:58,479 : INFO : P&P\n", + "2018-09-11 22:25:58,479 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,480 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,497 : INFO : Vocabulary size: 10 500\n", + "2018-09-11 22:25:58,498 : INFO : WCD\n", + "2018-09-11 22:25:58,499 : INFO : Vocabulary size: 8 500\n", + "2018-09-11 22:25:58,500 : INFO : WCD\n", + "2018-09-11 22:25:58,500 : INFO : 0.0\n", + "2018-09-11 22:25:58,501 : INFO : First K WMD\n", + "2018-09-11 22:25:58,502 : INFO : 0.0\n", + "2018-09-11 22:25:58,503 : INFO : First K WMD\n", + "2018-09-11 22:25:58,515 : INFO : [(-19.079740524291992, 7), (-18.612424850463867, 9), (-16.35387420654297, 6), (-16.744731903076172, 3), (-17.912837982177734, 4), (-0.0, 0), (-15.7976713180542, 5), (-15.564203262329102, 8), (-15.525650978088379, 1), (-15.550748825073242, 2)]\n", + "2018-09-11 22:25:58,518 : INFO : 0.0\n", + "2018-09-11 22:25:58,520 : INFO : P&P\n", + "2018-09-11 22:25:58,521 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,510 : INFO : [(-22.172027587890625, 4), (-21.61946678161621, 9), (-19.401174545288086, 8), (-20.109725952148438, 0), (-19.69805335998535, 3), (-18.986125946044922, 1), (-18.151046752929688, 7), (-17.19361686706543, 6), (-19.390060424804688, 5), (-19.571441650390625, 2)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:58,526 : INFO : 0.0\n", + "2018-09-11 22:25:58,529 : INFO : P&P\n", + "2018-09-11 22:25:58,534 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,550 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:58,570 : INFO : Vocabulary size: 8 500\n", + "2018-09-11 22:25:58,572 : INFO : WCD\n", + "2018-09-11 22:25:58,575 : INFO : WCD\n", + "2018-09-11 22:25:58,578 : INFO : 0.0\n", + "2018-09-11 22:25:58,583 : INFO : First K WMD\n", + "2018-09-11 22:25:58,582 : INFO : 0.0\n", + "2018-09-11 22:25:58,587 : INFO : First K WMD\n", + "2018-09-11 22:25:58,593 : INFO : [(-17.911226272583008, 9), (-17.565181732177734, 3), (-17.15479850769043, 7), (-15.223217964172363, 5), (-16.388784408569336, 2), (-16.640281677246094, 4), (-14.70413589477539, 8), (-15.15954875946045, 1), (-0.0, 0), (-14.843816757202148, 6)]\n", + "2018-09-11 22:25:58,598 : INFO : 0.0\n", + "2018-09-11 22:25:58,599 : INFO : P&P\n", + "2018-09-11 22:25:58,601 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,598 : INFO : [(-19.13587760925293, 9), (-18.659883499145508, 8), (-16.657546997070312, 2), (-16.91031837463379, 6), (-17.777273178100586, 5), (-16.526939392089844, 1), (-10.178200721740723, 0), (-15.90070915222168, 4), (-16.125410079956055, 3), (-16.29979133605957, 7)]\n", + "2018-09-11 22:25:58,611 : INFO : 0.0\n", + "2018-09-11 22:25:58,612 : INFO : P&P\n", + "2018-09-11 22:25:58,613 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,622 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,623 : INFO : WCD\n", + "2018-09-11 22:25:58,625 : INFO : 0.0\n", + "2018-09-11 22:25:58,626 : INFO : First K WMD\n", + "2018-09-11 22:25:58,631 : INFO : Vocabulary size: 11 500\n", + "2018-09-11 22:25:58,632 : INFO : WCD\n", + "2018-09-11 22:25:58,633 : INFO : [(-20.36227035522461, 2), (-20.02027702331543, 6), (-18.461214065551758, 5), (-19.061643600463867, 3), (-19.18093490600586, 7), (-15.513406753540039, 0), (-16.753686904907227, 4), (-17.826271057128906, 1), (-18.72931480407715, 9), (-17.050100326538086, 8)]\n", + "2018-09-11 22:25:58,634 : INFO : 0.0\n", + "2018-09-11 22:25:58,634 : INFO : 0.0\n", + "2018-09-11 22:25:58,635 : INFO : P&P\n", + "2018-09-11 22:25:58,635 : INFO : First K WMD\n", + "2018-09-11 22:25:58,635 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,640 : INFO : [(-22.829483032226562, 8), (-22.363967895507812, 3), (-21.62893295288086, 7), (-21.057315826416016, 4), (-22.267024993896484, 6), (-21.394433975219727, 5), (-18.600770950317383, 0), (-17.37847328186035, 2), (-20.629005432128906, 9), (-16.147872924804688, 1)]\n", + "2018-09-11 22:25:58,641 : INFO : 0.0\n", + "2018-09-11 22:25:58,642 : INFO : P&P\n", + "2018-09-11 22:25:58,643 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,653 : INFO : Vocabulary size: 20 500\n", + "2018-09-11 22:25:58,654 : INFO : WCD\n", + "2018-09-11 22:25:58,656 : INFO : 0.0\n", + "2018-09-11 22:25:58,657 : INFO : First K WMD\n", + "2018-09-11 22:25:58,663 : INFO : Vocabulary size: 7 500\n", + "2018-09-11 22:25:58,664 : INFO : WCD\n", + "2018-09-11 22:25:58,665 : INFO : [(-21.883054733276367, 9), (-20.296064376831055, 1), (-19.330833435058594, 7), (-20.002065658569336, 8), (-19.066402435302734, 5), (-19.277265548706055, 3), (-19.093284606933594, 6), (-0.0, 0), (-17.697256088256836, 2), (-18.806989669799805, 4)]\n", + "2018-09-11 22:25:58,666 : INFO : 0.0\n", + "2018-09-11 22:25:58,666 : INFO : 0.0\n", + "2018-09-11 22:25:58,667 : INFO : P&P\n", + "2018-09-11 22:25:58,667 : INFO : First K WMD\n", + "2018-09-11 22:25:58,668 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,674 : INFO : [(-20.8542537689209, 3), (-19.96109390258789, 6), (-17.577333450317383, 2), (-19.029502868652344, 5), (-16.84893798828125, 8), (-16.881351470947266, 4), (-16.09895133972168, 0), (-17.80130386352539, 7), (-18.95881462097168, 1), (-15.965883255004883, 9)]\n", + "2018-09-11 22:25:58,675 : INFO : 0.0\n", + "2018-09-11 22:25:58,675 : INFO : P&P\n", + "2018-09-11 22:25:58,676 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,686 : INFO : Vocabulary size: 27 500\n", + "2018-09-11 22:25:58,687 : INFO : WCD\n", + "2018-09-11 22:25:58,689 : INFO : 0.0\n", + "2018-09-11 22:25:58,690 : INFO : First K WMD\n", + "2018-09-11 22:25:58,696 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,697 : INFO : WCD\n", + "2018-09-11 22:25:58,699 : INFO : [(-22.32929039001465, 8), (-20.41254997253418, 7), (-20.90126609802246, 6), (-20.18366813659668, 1), (-18.876007080078125, 9), (-19.411521911621094, 4), (-18.523984909057617, 5), (-0.0, 0), (-18.146114349365234, 2), (-18.378660202026367, 3)]\n", + "2018-09-11 22:25:58,699 : INFO : 0.0\n", + "2018-09-11 22:25:58,700 : INFO : 0.0\n", + "2018-09-11 22:25:58,700 : INFO : First K WMD\n", + "2018-09-11 22:25:58,701 : INFO : P&P\n", + "2018-09-11 22:25:58,702 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,708 : INFO : [(-21.03609848022461, 3), (-20.929983139038086, 7), (-20.437631607055664, 0), (-20.79033088684082, 9), (-19.649106979370117, 2), (-20.05573272705078, 8), (-20.138891220092773, 4), (-19.18378257751465, 5), (-20.68857765197754, 6), (-18.80322265625, 1)]\n", + "2018-09-11 22:25:58,709 : INFO : 0.0\n", + "2018-09-11 22:25:58,710 : INFO : P&P\n", + "2018-09-11 22:25:58,711 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,719 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:58,720 : INFO : WCD\n", + "2018-09-11 22:25:58,722 : INFO : 0.0\n", + "2018-09-11 22:25:58,723 : INFO : First K WMD\n", + "2018-09-11 22:25:58,728 : INFO : [(-20.272275924682617, 9), (-18.175186157226562, 1), (-17.054515838623047, 6), (-17.560848236083984, 8), (-17.936189651489258, 7), (-15.665891647338867, 3), (-0.0, 0), (-15.701078414916992, 5), (-16.321266174316406, 4), (-17.918067932128906, 2)]\n", + "2018-09-11 22:25:58,729 : INFO : 0.0\n", + "2018-09-11 22:25:58,729 : INFO : P&P\n", + "2018-09-11 22:25:58,730 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,733 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:58,734 : INFO : WCD\n", + "2018-09-11 22:25:58,736 : INFO : 0.0\n", + "2018-09-11 22:25:58,737 : INFO : First K WMD\n", + "2018-09-11 22:25:58,746 : INFO : [(-19.55820083618164, 7), (-19.535789489746094, 5), (-17.83282470703125, 9), (-19.4847354888916, 6), (-19.240028381347656, 3), (-16.743432998657227, 0), (-17.031152725219727, 4), (-17.337175369262695, 1), (-17.72780990600586, 2), (-17.97848892211914, 8)]\n", + "2018-09-11 22:25:58,747 : INFO : 0.0\n", + "2018-09-11 22:25:58,748 : INFO : P&P\n", + "2018-09-11 22:25:58,748 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:58,748 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,749 : INFO : WCD\n", + "2018-09-11 22:25:58,751 : INFO : 0.0\n", + "2018-09-11 22:25:58,752 : INFO : First K WMD\n", + "2018-09-11 22:25:58,759 : INFO : [(-21.612688064575195, 9), (-21.4803409576416, 7), (-20.827802658081055, 3), (-19.619096755981445, 8), (-20.556396484375, 1), (-16.68907928466797, 0), (-18.545000076293945, 4), (-19.338054656982422, 2), (-18.22032356262207, 5), (-18.978164672851562, 6)]\n", + "2018-09-11 22:25:58,759 : INFO : 0.0\n", + "2018-09-11 22:25:58,760 : INFO : P&P\n", + "2018-09-11 22:25:58,761 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,770 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:58,771 : INFO : WCD\n", + "2018-09-11 22:25:58,773 : INFO : 0.0\n", + "2018-09-11 22:25:58,774 : INFO : First K WMD\n", + "2018-09-11 22:25:58,780 : INFO : [(-21.230680465698242, 8), (-20.992713928222656, 6), (-21.034303665161133, 5), (-20.609220504760742, 1), (-20.88553237915039, 4), (-20.55921173095703, 3), (-20.584562301635742, 9), (-20.077434539794922, 2), (-18.764585494995117, 0), (-19.975536346435547, 7)]\n", + "2018-09-11 22:25:58,781 : INFO : 0.0\n", + "2018-09-11 22:25:58,782 : INFO : P&P\n", + "2018-09-11 22:25:58,782 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,801 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:58,803 : INFO : WCD\n", + "2018-09-11 22:25:58,805 : INFO : 0.0\n", + "2018-09-11 22:25:58,806 : INFO : First K WMD\n", + "2018-09-11 22:25:58,812 : INFO : [(-18.18400001525879, 8), (-17.58977699279785, 2), (-16.736713409423828, 3), (-17.56550407409668, 1), (-16.782609939575195, 7), (-15.703130722045898, 5), (-16.672775268554688, 4), (-15.72400188446045, 9), (-15.906947135925293, 0), (-16.412216186523438, 6)]\n", + "2018-09-11 22:25:58,813 : INFO : 0.0\n", + "2018-09-11 22:25:58,814 : INFO : P&P\n", + "2018-09-11 22:25:58,815 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,834 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:58,835 : INFO : WCD\n", + "2018-09-11 22:25:58,837 : INFO : 0.0\n", + "2018-09-11 22:25:58,838 : INFO : First K WMD\n", + "2018-09-11 22:25:58,844 : INFO : [(-20.066879272460938, 9), (-20.004825592041016, 7), (-19.944217681884766, 6), (-19.601634979248047, 2), (-19.930089950561523, 8), (-19.278179168701172, 0), (-18.157686233520508, 4), (-19.476640701293945, 3), (-14.50880241394043, 1), (-19.645950317382812, 5)]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:25:58,845 : INFO : 0.0\n", + "2018-09-11 22:25:58,846 : INFO : P&P\n", + "2018-09-11 22:25:58,847 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,866 : INFO : Vocabulary size: 9 500\n", + "2018-09-11 22:25:58,868 : INFO : WCD\n", + "2018-09-11 22:25:58,870 : INFO : 0.0\n", + "2018-09-11 22:25:58,871 : INFO : First K WMD\n", + "2018-09-11 22:25:58,878 : INFO : [(-19.23527717590332, 3), (-19.186975479125977, 7), (-17.86774253845215, 8), (-18.0268497467041, 1), (-16.744600296020508, 4), (-16.31690788269043, 9), (-17.59482192993164, 6), (-15.280181884765625, 2), (-16.7364559173584, 0), (-13.971843719482422, 5)]\n", + "2018-09-11 22:25:58,879 : INFO : 0.0\n", + "2018-09-11 22:25:58,880 : INFO : P&P\n", + "2018-09-11 22:25:58,880 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,899 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:58,901 : INFO : WCD\n", + "2018-09-11 22:25:58,903 : INFO : 0.0\n", + "2018-09-11 22:25:58,904 : INFO : First K WMD\n", + "2018-09-11 22:25:58,909 : INFO : [(-23.60029411315918, 9), (-23.017337799072266, 6), (-22.694913864135742, 2), (-22.753923416137695, 4), (-22.664838790893555, 1), (-22.562593460083008, 7), (-22.561864852905273, 5), (-21.107126235961914, 8), (-21.983001708984375, 3), (-19.206193923950195, 0)]\n", + "2018-09-11 22:25:58,910 : INFO : 0.0\n", + "2018-09-11 22:25:58,911 : INFO : P&P\n", + "2018-09-11 22:25:58,912 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,931 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:58,932 : INFO : WCD\n", + "2018-09-11 22:25:58,934 : INFO : 0.0\n", + "2018-09-11 22:25:58,935 : INFO : First K WMD\n", + "2018-09-11 22:25:58,946 : INFO : [(-17.954301834106445, 8), (-17.734582901000977, 9), (-17.73577308654785, 6), (-16.604555130004883, 0), (-17.155250549316406, 2), (-13.621485710144043, 7), (-15.783629417419434, 1), (-15.569751739501953, 5), (-15.08890438079834, 4), (-16.90961265563965, 3)]\n", + "2018-09-11 22:25:58,948 : INFO : 0.0\n", + "2018-09-11 22:25:58,949 : INFO : P&P\n", + "2018-09-11 22:25:58,950 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:58,980 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:58,981 : INFO : WCD\n", + "2018-09-11 22:25:58,984 : INFO : 0.0\n", + "2018-09-11 22:25:58,986 : INFO : First K WMD\n", + "2018-09-11 22:25:58,996 : INFO : [(-20.467594146728516, 3), (-20.312692642211914, 6), (-19.303499221801758, 5), (-20.208036422729492, 7), (-19.99427604675293, 8), (-18.846153259277344, 4), (-18.196239471435547, 1), (-17.997549057006836, 0), (-19.58407211303711, 9), (-15.867852210998535, 2)]\n", + "2018-09-11 22:25:59,006 : INFO : 0.0\n", + "2018-09-11 22:25:59,011 : INFO : P&P\n", + "2018-09-11 22:25:59,016 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,049 : INFO : Vocabulary size: 15 500\n", + "2018-09-11 22:25:59,051 : INFO : WCD\n", + "2018-09-11 22:25:59,053 : INFO : 0.0\n", + "2018-09-11 22:25:59,054 : INFO : First K WMD\n", + "2018-09-11 22:25:59,061 : INFO : [(-19.664464950561523, 9), (-19.155519485473633, 4), (-18.358680725097656, 5), (-17.876155853271484, 6), (-16.82989501953125, 8), (-17.40734100341797, 7), (-17.210935592651367, 1), (-14.242122650146484, 0), (-12.894062042236328, 3), (-16.33307456970215, 2)]\n", + "2018-09-11 22:25:59,062 : INFO : 0.0\n", + "2018-09-11 22:25:59,063 : INFO : P&P\n", + "2018-09-11 22:25:59,072 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,121 : INFO : Vocabulary size: 19 500\n", + "2018-09-11 22:25:59,132 : INFO : WCD\n", + "2018-09-11 22:25:59,138 : INFO : 0.0\n", + "2018-09-11 22:25:59,143 : INFO : First K WMD\n", + "2018-09-11 22:25:59,160 : INFO : [(-21.458436965942383, 9), (-19.76059913635254, 0), (-20.398841857910156, 2), (-19.40574073791504, 5), (-17.732982635498047, 4), (-17.74916648864746, 3), (-15.80296802520752, 6), (-17.70541763305664, 8), (-16.514184951782227, 1), (-16.389368057250977, 7)]\n", + "2018-09-11 22:25:59,165 : INFO : 0.0\n", + "2018-09-11 22:25:59,169 : INFO : P&P\n", + "2018-09-11 22:25:59,169 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,189 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:59,190 : INFO : WCD\n", + "2018-09-11 22:25:59,192 : INFO : 0.0\n", + "2018-09-11 22:25:59,193 : INFO : First K WMD\n", + "2018-09-11 22:25:59,202 : INFO : [(-17.97475242614746, 8), (-15.771330833435059, 6), (-15.66806697845459, 5), (-15.689934730529785, 9), (-15.143582344055176, 2), (-14.668686866760254, 1), (-14.961108207702637, 4), (-15.054632186889648, 3), (-15.20167064666748, 7), (-14.523492813110352, 0)]\n", + "2018-09-11 22:25:59,203 : INFO : 0.0\n", + "2018-09-11 22:25:59,204 : INFO : P&P\n", + "2018-09-11 22:25:59,205 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,225 : INFO : Vocabulary size: 18 500\n", + "2018-09-11 22:25:59,226 : INFO : WCD\n", + "2018-09-11 22:25:59,228 : INFO : 0.0\n", + "2018-09-11 22:25:59,229 : INFO : First K WMD\n", + "2018-09-11 22:25:59,239 : INFO : [(-20.535655975341797, 3), (-19.996644973754883, 7), (-19.559776306152344, 8), (-19.375608444213867, 4), (-19.035423278808594, 9), (-17.55828285217285, 5), (-16.864097595214844, 2), (-17.597759246826172, 6), (-18.404525756835938, 0), (-18.959619522094727, 1)]\n", + "2018-09-11 22:25:59,240 : INFO : 0.0\n", + "2018-09-11 22:25:59,241 : INFO : P&P\n", + "2018-09-11 22:25:59,242 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,261 : INFO : Vocabulary size: 13 500\n", + "2018-09-11 22:25:59,262 : INFO : WCD\n", + "2018-09-11 22:25:59,264 : INFO : 0.0\n", + "2018-09-11 22:25:59,265 : INFO : First K WMD\n", + "2018-09-11 22:25:59,272 : INFO : [(-21.771106719970703, 4), (-21.247968673706055, 2), (-20.598562240600586, 0), (-20.135677337646484, 3), (-20.833316802978516, 5), (-20.54116439819336, 8), (-17.429851531982422, 1), (-18.592809677124023, 6), (-18.8632755279541, 7), (-19.952924728393555, 9)]\n", + "2018-09-11 22:25:59,273 : INFO : 0.0\n", + "2018-09-11 22:25:59,274 : INFO : P&P\n", + "2018-09-11 22:25:59,275 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,293 : INFO : Vocabulary size: 12 500\n", + "2018-09-11 22:25:59,294 : INFO : WCD\n", + "2018-09-11 22:25:59,296 : INFO : 0.0\n", + "2018-09-11 22:25:59,297 : INFO : First K WMD\n", + "2018-09-11 22:25:59,304 : INFO : [(-19.042404174804688, 8), (-17.802324295043945, 3), (-17.451053619384766, 2), (-17.502456665039062, 9), (-17.570905685424805, 0), (-15.920186996459961, 1), (-17.35521125793457, 5), (-15.297701835632324, 4), (-14.293990135192871, 6), (-16.688701629638672, 7)]\n", + "2018-09-11 22:25:59,305 : INFO : 0.0\n", + "2018-09-11 22:25:59,306 : INFO : P&P\n", + "2018-09-11 22:25:59,307 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,324 : INFO : Vocabulary size: 6 500\n", + "2018-09-11 22:25:59,325 : INFO : WCD\n", + "2018-09-11 22:25:59,327 : INFO : 0.0\n", + "2018-09-11 22:25:59,328 : INFO : First K WMD\n", + "2018-09-11 22:25:59,333 : INFO : [(-20.039142608642578, 9), (-19.704204559326172, 6), (-19.04420280456543, 7), (-19.313358306884766, 3), (-19.128257751464844, 8), (-18.67458724975586, 1), (-18.578676223754883, 5), (-17.6994571685791, 2), (-19.070281982421875, 0), (-18.370285034179688, 4)]\n", + "2018-09-11 22:25:59,333 : INFO : 0.0\n", + "2018-09-11 22:25:59,334 : INFO : P&P\n", + "2018-09-11 22:25:59,335 : INFO : stopped by early_stop condition\n", + "2018-09-11 22:25:59,353 : INFO : Vocabulary size: 14 500\n", + "2018-09-11 22:25:59,355 : INFO : WCD\n", + "2018-09-11 22:25:59,357 : INFO : 0.0\n", + "2018-09-11 22:25:59,358 : INFO : First K WMD\n", + "2018-09-11 22:25:59,364 : INFO : [(-19.95884895324707, 7), (-19.88328742980957, 4), (-19.149295806884766, 9), (-18.12515640258789, 3), (-18.278722763061523, 0), (-17.01123046875, 1), (-17.777706146240234, 6), (-17.219406127929688, 8), (-17.94599723815918, 2), (-17.71858024597168, 5)]\n", + "2018-09-11 22:25:59,365 : INFO : 0.0\n", + "2018-09-11 22:25:59,365 : INFO : P&P\n", + "2018-09-11 22:25:59,366 : INFO : stopped by early_stop condition\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/similarities/termsim.py:357: RuntimeWarning: divide by zero encountered in true_divide\n", + " Y = np.multiply(Y, 1 / np.sqrt(Y_norm))\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/similarities/termsim.py:357: RuntimeWarning: invalid value encountered in multiply\n", + " Y = np.multiply(Y, 1 / np.sqrt(Y_norm))\n", + "2018-09-11 22:26:00,359 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,361 : INFO : built Dictionary(46 unique tokens: ['pls', 'need', 'rp', 'please', 'agreement']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:00,381 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,382 : INFO : built Dictionary(34 unique tokens: ['get', 'anybody', 'trying', 'gone', 'month']...) from 2 documents (total 45 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:00,395 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,397 : INFO : built Dictionary(40 unique tokens: ['wife', 'counter', 'trying', 'submission', 'rent']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:00,413 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,415 : INFO : built Dictionary(41 unique tokens: ['reply', 'approve', 'review', 'showing', 'trying']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:00,432 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,433 : INFO : built Dictionary(34 unique tokens: ['ask', 'wife', 'get', 'civil', 'profession']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:00,446 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,447 : INFO : built Dictionary(37 unique tokens: ['wife', 'qatar', 'civil', 'trying', 'even']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:00,463 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,465 : INFO : built Dictionary(43 unique tokens: ['goods', 'countries', 'talking', 'could', 'even']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:00,467 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,468 : INFO : built Dictionary(25 unique tokens: ['get', 'schedule', 'meet', 'month', 'one']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:00,480 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:00,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,482 : INFO : built Dictionary(35 unique tokens: ['difference', 'said', 'qatar', 'pls', 'thanks']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:00,494 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,496 : INFO : built Dictionary(22 unique tokens: ['goods', 'marriage', 'could', 'long', 'small']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:00,503 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:00,504 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,505 : INFO : built Dictionary(51 unique tokens: ['civil', 'rent', 'rp', 'please', 'show']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:00,518 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:00,527 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,529 : INFO : built Dictionary(33 unique tokens: ['goods', 'small', 'ok', 'could', 'long']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:00,545 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,537 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,547 : INFO : built Dictionary(28 unique tokens: ['goods', 'provide', 'fare', 'long', 'marriage']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:00,547 : INFO : built Dictionary(27 unique tokens: ['wife', 'get', 'rent', 'format', 'possible']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:00,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,557 : INFO : built Dictionary(38 unique tokens: ['confusion', 'fourth', 'goods', 'marriage', 'third']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:00,561 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:00,572 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,573 : INFO : built Dictionary(31 unique tokens: ['goods', 'beautiful', 'could', 'long', 'marriage']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:00,582 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,583 : INFO : built Dictionary(36 unique tokens: ['goods', 'night', 'closed', 'could', 'long']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:00,596 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,597 : INFO : built Dictionary(32 unique tokens: ['accepted', 'living', 'small', 'could', 'long']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:00,607 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,608 : INFO : built Dictionary(30 unique tokens: ['goods', 'small', 'woman', 'could', 'comments']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:00,615 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:00,616 : INFO : built Dictionary(49 unique tokens: ['goods', 'countries', 'care', 'expat', 'qatar']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:00,639 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:02,630 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,632 : INFO : built Dictionary(39 unique tokens: ['maid', 'qatar', 'fire', 'sponsoring', 'breaking']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:02,645 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,646 : INFO : built Dictionary(41 unique tokens: ['worked', 'current', 'qatar', 'said', 'thanks']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:02,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,661 : INFO : built Dictionary(17 unique tokens: ['give', 'qatar', 'passport', 'release', 'even']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:02,667 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,671 : INFO : built Dictionary(38 unique tokens: ['wife', 'maid', 'qatar', 'would', 'release']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:02,692 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,696 : INFO : built Dictionary(24 unique tokens: ['maid', 'qatar', 'ethiopia', 'thanks', 'cancel']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:02,708 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,716 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,717 : INFO : built Dictionary(35 unique tokens: ['get', 'yr', 'thanks', 'coz', 'qatari']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:02,738 : INFO : built Dictionary(51 unique tokens: ['group', 'ticket', 'better', 'decent', 'qar']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:26:02,738 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,753 : INFO : built Dictionary(34 unique tokens: ['give', 'get', 'release', 'could', 'cancel']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:02,769 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,771 : INFO : built Dictionary(45 unique tokens: ['work', 'raise', 'release', 'soon', 'accommodation']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:02,784 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,786 : INFO : built Dictionary(35 unique tokens: ['exp', 'living', 'air', 'ticket', 'insurance']...) from 2 documents (total 80 corpus positions)\n", + "2018-09-11 22:26:02,788 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,790 : INFO : built Dictionary(40 unique tokens: ['wife', 'maid', 'qatar', 'release', 'lives']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:02,803 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,804 : INFO : built Dictionary(30 unique tokens: ['plz', 'alternative', 'qatar', 'release', 'whether']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:02,814 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:02,825 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,826 : INFO : built Dictionary(41 unique tokens: ['exp', 'industries', 'air', 'ticket', 'insurance']...) from 2 documents (total 54 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:02,842 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,843 : INFO : built Dictionary(63 unique tokens: ['cover', 'air', 'ticket', 'tell', 'decent']...) from 2 documents (total 82 corpus positions)\n", + "2018-09-11 22:26:02,890 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,892 : INFO : built Dictionary(61 unique tokens: ['pharma', 'air', 'ticket', 'manager', 'years']...) from 2 documents (total 76 corpus positions)\n", + "2018-09-11 22:26:02,938 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:02,940 : INFO : built Dictionary(58 unique tokens: ['air', 'say', 'travel', 'qar', 'ticket']...) from 2 documents (total 82 corpus positions)\n", + "2018-09-11 22:26:02,997 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:03,007 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:03,009 : INFO : built Dictionary(44 unique tokens: ['air', 'decent', 'qar', 'ticket', 'india']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:03,050 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:03,052 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:03,054 : INFO : built Dictionary(51 unique tokens: ['air', 'corporate', 'abt', 'decent', 'qar']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:26:03,088 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:03,090 : INFO : built Dictionary(45 unique tokens: ['work', 'air', 'ok', 'exp', 'decent']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:03,108 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:03,109 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:03,110 : INFO : built Dictionary(50 unique tokens: ['open', 'air', 'ticket', 'roughly', 'qar']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:03,135 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:04,898 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,908 : INFO : built Dictionary(33 unique tokens: ['resign', 'current', 'qatar', 'wife', 'receiving']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:04,923 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,925 : INFO : built Dictionary(36 unique tokens: ['wife', 'current', 'qatar', 'switch', 'release']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:04,939 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,941 : INFO : built Dictionary(15 unique tokens: ['wife', 'work', 'qatar', 'steps', 'sponsorship']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:04,956 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,958 : INFO : built Dictionary(16 unique tokens: ['work', 'three', 'company', 'thanks', 'even']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:26:04,965 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,966 : INFO : built Dictionary(18 unique tokens: ['permit', 'get', 'work', 'long', 'sponsorship']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:04,973 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,975 : INFO : built Dictionary(24 unique tokens: ['wife', 'man', 'qatar', 'know', 'thanks']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:04,986 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:04,990 : INFO : built Dictionary(25 unique tokens: ['fathers', 'maid', 'qatar', 'changing', 'please']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:05,001 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,005 : INFO : built Dictionary(29 unique tokens: ['getting', 'get', 'permit', 'son', 'qatar']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:05,018 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,022 : INFO : built Dictionary(29 unique tokens: ['wife', 'work', 'qatar', 'weeks', 'thanks']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:05,036 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,039 : INFO : built Dictionary(30 unique tokens: ['qatar', 'received', 'would', 'work', 'transfer']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:05,053 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:05,228 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,230 : INFO : built Dictionary(31 unique tokens: ['past', 'even', 'true', 'yet', 'car']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:05,241 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,242 : INFO : built Dictionary(36 unique tokens: ['ask', 'pay', 'light', 'back', 'past']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:05,255 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:05,256 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,257 : INFO : built Dictionary(35 unique tokens: ['reduce', 'pls', 'signal', 'month', 'traffic']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:05,268 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,270 : INFO : built Dictionary(23 unique tokens: ['friend', 'thanks', 'members', 'yet', 'car']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:05,276 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,277 : INFO : built Dictionary(15 unique tokens: ['know', 'realized', 'thanks', 'penalty', 'red']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:26:05,280 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,281 : INFO : built Dictionary(41 unique tokens: ['mom', 'drive', 'signal', 'jumping', 'going']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:05,296 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,298 : INFO : built Dictionary(47 unique tokens: ['pls', 'signal', 'second', 'forward', 'realized']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:05,315 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,317 : INFO : built Dictionary(35 unique tokens: ['know', 'god', 'hope', 'penalty', 'fined']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:05,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,330 : INFO : built Dictionary(34 unique tokens: ['received', 'camera', 'know', 'wait', 'jumping']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:05,342 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:05,343 : INFO : built Dictionary(44 unique tokens: ['qar', 'ur', 'average', 'months', 'u']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:05,359 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:07,096 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,098 : INFO : built Dictionary(38 unique tokens: ['nice', 'within', 'would', 'area', 'work']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:07,113 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,114 : INFO : built Dictionary(37 unique tokens: ['shop', 'would', 'area', 'thanks', 'within']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:07,128 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,129 : INFO : built Dictionary(43 unique tokens: ['shop', 'within', 'would', 'area', 'thanks']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:07,148 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:07,150 : INFO : built Dictionary(33 unique tokens: ['shop', 'within', 'knows', 'area', 'sew']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:07,159 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,160 : INFO : built Dictionary(42 unique tokens: ['shop', 'get', 'would', 'area', 'selling']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:07,180 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,181 : INFO : built Dictionary(53 unique tokens: ['shop', 'would', 'either', 'selection', 'trousers']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:07,208 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,209 : INFO : built Dictionary(46 unique tokens: ['work', 'would', 'garage', 'trousers', 'upcoming']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:07,231 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,232 : INFO : built Dictionary(38 unique tokens: ['shop', 'within', 'would', 'area', 'sew']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:07,246 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,247 : INFO : built Dictionary(40 unique tokens: ['shop', 'within', 'would', 'bombay', 'sew']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:07,261 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:07,263 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,263 : INFO : built Dictionary(61 unique tokens: ['shop', 'would', 'need', 'within', 'traditional']...) from 2 documents (total 71 corpus positions)\n", + "2018-09-11 22:26:07,300 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:07,332 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,334 : INFO : built Dictionary(6 unique tokens: ['body', 'advice', 'gold', 'loan', 'qatar']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:26:07,336 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,337 : INFO : built Dictionary(25 unique tokens: ['get', 'qatar', 'cash', 'one', 'options']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:07,343 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,344 : INFO : built Dictionary(17 unique tokens: ['bank', 'get', 'qatar', 'wise', 'gold']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:07,348 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,349 : INFO : built Dictionary(35 unique tokens: ['best', 'qatar', 'get', 'deposit', 'need']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:07,357 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,358 : INFO : built Dictionary(37 unique tokens: ['qatar', 'body', 'provide', 'discussed', 'real']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:07,368 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,369 : INFO : built Dictionary(32 unique tokens: ['friend', 'qatar', 'body', 'crazy', 'right']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:07,377 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,377 : INFO : built Dictionary(33 unique tokens: ['getting', 'qatar', 'actually', 'certify', 'muslims']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:07,386 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,387 : INFO : built Dictionary(9 unique tokens: ['qatar', 'body', 'gold', 'advice', 'best']...) from 2 documents (total 12 corpus positions)\n", + "2018-09-11 22:26:07,388 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,389 : INFO : built Dictionary(17 unique tokens: ['qatar', 'body', 'qatarized', 'qatarization', 'must']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:26:07,393 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:07,394 : INFO : built Dictionary(33 unique tokens: ['friend', 'qatar', 'body', 'restaurants', 'threads']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:07,402 : INFO : precomputing L2-norms of word weight vectors\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/similarities/termsim.py:357: RuntimeWarning: divide by zero encountered in true_divide\n", + " Y = np.multiply(Y, 1 / np.sqrt(Y_norm))\n", + "/mnt/storage/home/novotny/.virtualenvs/gensim/lib/python3.4/site-packages/gensim/similarities/termsim.py:357: RuntimeWarning: invalid value encountered in multiply\n", + " Y = np.multiply(Y, 1 / np.sqrt(Y_norm))\n", + "2018-09-11 22:26:09,309 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,310 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,311 : INFO : built Dictionary(33 unique tokens: ['ine', 'valuable', 'doc', 'better', 'medicine']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:09,321 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,322 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,323 : INFO : built Dictionary(62 unique tokens: ['wil', 'better', 'advertisement', 'exam', 'jaida']...) from 2 documents (total 71 corpus positions)\n", + "2018-09-11 22:26:09,355 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,357 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,358 : INFO : built Dictionary(38 unique tokens: ['module', 'ine', 'pls', 'study', 'thanks']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:09,372 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,373 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,374 : INFO : built Dictionary(51 unique tokens: ['april', 'better', 'write', 'evening', 'day']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:09,399 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,400 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,401 : INFO : built Dictionary(51 unique tokens: ['hw', 'salaries', 'reply', 'accommodation', 'ur']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:09,424 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,425 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,426 : INFO : built Dictionary(61 unique tokens: ['projects', 'starting', 'economy', 'exam', 'east']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:26:09,428 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,429 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,430 : INFO : built Dictionary(10 unique tokens: ['first', 'mine', 'job', 'manila', 'ever']...) from 2 documents (total 12 corpus positions)\n", + "2018-09-11 22:26:09,432 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,433 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,434 : INFO : built Dictionary(18 unique tokens: ['ever', 'mine', 'brand', 'interiors', 'jeep']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:09,437 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,438 : INFO : built Dictionary(9 unique tokens: ['first', 'ever', 'anyone', 'use', 'job']...) from 2 documents (total 9 corpus positions)\n", + "2018-09-11 22:26:09,440 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,441 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,441 : INFO : built Dictionary(21 unique tokens: ['hear', 'ever', 'c', 'golliwog', 'p']...) from 2 documents (total 24 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:09,445 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,446 : INFO : built Dictionary(27 unique tokens: ['rewarding', 'countries', 'hope', 'started', 'label']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:09,451 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,451 : INFO : built Dictionary(22 unique tokens: ['picture', 'idolize', 'even', 'small', 'one']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:26:09,455 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,456 : INFO : built Dictionary(46 unique tokens: ['actually', 'shall', 'say', 'day', 'good']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:09,459 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,460 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,461 : INFO : built Dictionary(32 unique tokens: ['ine', 'group', 'study', 'max', 'preferrably']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:09,465 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,466 : INFO : built Dictionary(10 unique tokens: ['adventures', 'interesting', 'get', 'ever', 'job']...) from 2 documents (total 10 corpus positions)\n", + "2018-09-11 22:26:09,468 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,468 : INFO : built Dictionary(37 unique tokens: ['activities', 'people', 'morning', 'p', 'meet']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:09,470 : INFO : Removed 0 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,470 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,471 : INFO : built Dictionary(46 unique tokens: ['ine', 'exam', 'wear', 'qatar', 'ur']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:09,475 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,493 : INFO : built Dictionary(12 unique tokens: ['answer', 'c', 'know', 'please', 'good']...) from 2 documents (total 13 corpus positions)\n", + "2018-09-11 22:26:09,500 : INFO : Removed 1 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,504 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:09,505 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,508 : INFO : built Dictionary(55 unique tokens: ['accent', 'days', 'parking', 'better', 'strangers']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:26:09,552 : INFO : Removed 3 and 3 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:09,565 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:09,566 : INFO : built Dictionary(37 unique tokens: ['give', 'ine', 'know', 'better', 'recently']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:09,587 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:11,538 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,539 : INFO : built Dictionary(43 unique tokens: ['isnt', 'renting', 'suggestions', 'ive', 'cash']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:11,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,558 : INFO : built Dictionary(35 unique tokens: ['step', 'apartments', 'landlords', 'sublet', 'thank']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:11,560 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,561 : INFO : built Dictionary(34 unique tokens: ['qatar', 'following', 'experienced', 'fare', 'nagpur']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:11,578 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,579 : INFO : built Dictionary(37 unique tokens: ['ranges', 'qatar', 'employment', 'service', 'experienced']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:11,574 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,586 : INFO : built Dictionary(41 unique tokens: ['picture', 'taken', 'people', 'qa', 'thanks']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:11,599 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,607 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,609 : INFO : built Dictionary(34 unique tokens: ['qatar', 'pls', 'experienced', 'could', 'travel']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:11,609 : INFO : built Dictionary(42 unique tokens: ['flat', 'area', 'experienced', 'recently', 'appartments']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:11,632 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,633 : INFO : built Dictionary(43 unique tokens: ['zag', 'qatar', 'towers', 'sublet', 'either']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:11,634 : INFO : built Dictionary(44 unique tokens: ['ask', 'euros', 'excess', 'maybe', 'body']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:11,657 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,666 : INFO : built Dictionary(39 unique tokens: ['qatar', 'leaves', 'back', 'sublet', 'situation']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:11,675 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:11,676 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,677 : INFO : built Dictionary(38 unique tokens: ['getting', 'qatar', 'interviews', 'experienced', 'recently']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:11,684 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,685 : INFO : built Dictionary(42 unique tokens: ['promised', 'recently', 'release', 'sublet', 'liabilities']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:11,706 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,708 : INFO : built Dictionary(41 unique tokens: ['ask', 'flat', 'interviews', 'nurse', 'sublet']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:11,693 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,710 : INFO : built Dictionary(46 unique tokens: ['experienced', 'rules', 'turkey', 'received', 'service']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:11,730 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,740 : INFO : built Dictionary(43 unique tokens: ['best', 'get', 'deposit', 'need', 'thanks']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:11,744 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,746 : INFO : built Dictionary(31 unique tokens: ['said', 'qatar', 'air', 'better', 'experienced']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:11,762 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,767 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,764 : INFO : built Dictionary(34 unique tokens: ['work', 'qatar', 'direct', 'experienced', 'could']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:11,787 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:11,769 : INFO : built Dictionary(37 unique tokens: ['work', 'problem', 'suggestions', 'cash', 'pay']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:11,794 : INFO : built Dictionary(46 unique tokens: ['allowance', 'experienced', 'nagpur', 'body', 'good']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:11,803 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:11,813 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:11,815 : INFO : built Dictionary(27 unique tokens: ['ask', 'qatar', 'experienced', 'could', 'nagpur']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:11,824 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:13,831 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,832 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,833 : INFO : built Dictionary(18 unique tokens: ['please', 'near', 'thanks', 'duhail', 'house']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:13,837 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,838 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,839 : INFO : built Dictionary(39 unique tokens: ['friend', 'problem', 'slender', 'woman', 'seem']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:13,841 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,842 : INFO : built Dictionary(23 unique tokens: ['determine', 'one', 'car', 'months', 'doha']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:13,855 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,856 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,857 : INFO : built Dictionary(50 unique tokens: ['accent', 'parking', 'strangers', 'please', 'favor']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:13,859 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,860 : INFO : built Dictionary(49 unique tokens: ['determine', 'qatari', 'months', 'best', 'specially']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:26:13,882 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,891 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,893 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,895 : INFO : built Dictionary(43 unique tokens: ['south', 'get', 'duhail', 'thanks', 'hello']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:13,894 : INFO : built Dictionary(44 unique tokens: ['reasonable', 'determine', 'care', 'married', 'months']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:13,920 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,930 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,932 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,934 : INFO : built Dictionary(30 unique tokens: ['qr', 'thanks', 'could', 'cats', 'pet']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:13,935 : INFO : built Dictionary(35 unique tokens: ['received', 'determine', 'truly', 'soon', 'qar']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:13,949 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,970 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,966 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:13,974 : INFO : built Dictionary(46 unique tokens: ['especially', 'ok', 'polite', 'please', 'house']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:13,974 : INFO : built Dictionary(45 unique tokens: ['work', 'determine', 'qatar', 'force', 'months']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:13,993 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:13,994 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,003 : INFO : built Dictionary(57 unique tokens: ['ask', 'duhail', 'constantly', 'day', 'lost']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:13,995 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,014 : INFO : built Dictionary(58 unique tokens: ['work', 'round', 'gratuity', 'determine', 'never']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:26:14,044 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:14,049 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,055 : INFO : built Dictionary(41 unique tokens: ['wanna', 'problem', 'ok', 'p', 'started']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:14,064 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,066 : INFO : built Dictionary(50 unique tokens: ['ask', 'need', 'nurse', 'attend', 'clinics']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:26:14,086 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:14,092 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,093 : INFO : built Dictionary(43 unique tokens: ['charges', 'actually', 'clean', 'beaches', 'daughter']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:14,107 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,109 : INFO : built Dictionary(54 unique tokens: ['determine', 'buying', 'qar', 'ur', 'average']...) from 2 documents (total 75 corpus positions)\n", + "2018-09-11 22:26:14,115 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:14,116 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,117 : INFO : built Dictionary(46 unique tokens: ['aside', 'hub', 'beach', 'please', 'good']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:14,136 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:14,138 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,140 : INFO : built Dictionary(42 unique tokens: ['determine', 'wage', 'per', 'government', 'month']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:14,157 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:14,158 : INFO : built Dictionary(41 unique tokens: ['qatar', 'determine', 'n', 'truly', 'might']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:14,175 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:16,087 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,096 : INFO : built Dictionary(32 unique tokens: ['parcel', 'travelling', 'else', 'might', 'also']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:16,115 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,120 : INFO : built Dictionary(47 unique tokens: ['granted', 'small', 'present', 'please', 'host']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:26:16,158 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,159 : INFO : built Dictionary(32 unique tokens: ['permits', 'permit', 'parcel', 'means', 'â']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:16,171 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,172 : INFO : built Dictionary(20 unique tokens: ['post', 'qatar', 'know', 'small', 'travelling']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:16,176 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,177 : INFO : built Dictionary(48 unique tokens: ['deposit', 'sheikh', 'vacation', 'travel', 'good']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:16,191 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,192 : INFO : built Dictionary(29 unique tokens: ['projects', 'received', 'usd', 'welding', 'fare']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:16,196 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,197 : INFO : built Dictionary(23 unique tokens: ['said', 'qatar', 'uae', 'travelling', 'small']...) from 2 documents (total 33 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:16,203 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,204 : INFO : built Dictionary(38 unique tokens: ['work', 'problem', 'parcel', 'travelling', 'pay']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:16,217 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,218 : INFO : built Dictionary(31 unique tokens: ['permit', 'drive', 'parcel', 'residence', 'rp']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:16,218 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,219 : INFO : built Dictionary(46 unique tokens: ['projects', 'usd', 'work', 'participated', 'please']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:16,229 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,230 : INFO : built Dictionary(50 unique tokens: ['info', 'said', 'renewal', 'need', 'valid']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:16,243 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,245 : INFO : built Dictionary(43 unique tokens: ['received', 'projects', 'qatar', 'usd', 'welding']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:16,250 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,251 : INFO : built Dictionary(24 unique tokens: ['parcel', 'travelling', 'thanks', 'say', 'small']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:16,258 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:16,265 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,266 : INFO : built Dictionary(40 unique tokens: ['received', 'projects', 'qatar', 'usd', 'welding']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:16,284 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,286 : INFO : built Dictionary(45 unique tokens: ['projects', 'fare', 'benefits', 'participated', 'qatar']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:16,319 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,321 : INFO : built Dictionary(54 unique tokens: ['projects', 'usd', 'changing', 'manager', 'agent']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:26:16,361 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,363 : INFO : built Dictionary(34 unique tokens: ['projects', 'received', 'usd', 'welding', 'fare']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:16,373 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,375 : INFO : built Dictionary(51 unique tokens: ['projects', 'thankx', 'excluding', 'fare', 'manager']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:16,406 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,408 : INFO : built Dictionary(47 unique tokens: ['info', 'work', 'fare', 'shall', 'participated']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:16,431 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:16,432 : INFO : built Dictionary(61 unique tokens: ['projects', 'usd', 'fare', 'benefits', 'style']...) from 2 documents (total 79 corpus positions)\n", + "2018-09-11 22:26:16,474 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:18,175 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,177 : INFO : built Dictionary(16 unique tokens: ['get', 'much', 'enough', 'per', 'pani']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:18,180 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,181 : INFO : built Dictionary(25 unique tokens: ['get', 'puri', 'telling', 'map', 'even']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:18,186 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,187 : INFO : built Dictionary(16 unique tokens: ['shop', 'get', 'puri', 'afican', 'pani']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:26:18,190 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,191 : INFO : built Dictionary(10 unique tokens: ['get', 'puri', 'doha', 'chinese', 'pani']...) from 2 documents (total 11 corpus positions)\n", + "2018-09-11 22:26:18,193 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:18,194 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,195 : INFO : built Dictionary(20 unique tokens: ['get', 'appreciate', 'thanks', 'find', 'know']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:18,199 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,200 : INFO : built Dictionary(18 unique tokens: ['dates', 'get', 'puri', 'discriminate', 'pani']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:18,203 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,204 : INFO : built Dictionary(15 unique tokens: ['south', 'get', 'indian', 'pani', 'puri']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:18,207 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,208 : INFO : built Dictionary(34 unique tokens: ['info', 'shop', 'qatar', 'puri', 'bodybuilding']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:18,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,216 : INFO : built Dictionary(51 unique tokens: ['get', 'answers', 'hiya', 'please', 'food']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:18,227 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,228 : INFO : built Dictionary(42 unique tokens: ['dried', 'get', 'baby', 'puri', 'suitcase']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:18,237 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:18,449 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,450 : INFO : built Dictionary(20 unique tokens: ['un', 'predictions', 'next', 'url_token', 'climate']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:18,456 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,457 : INFO : built Dictionary(23 unique tokens: ['p', 'whatever', 'also', 'protesting', 'dead']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:18,463 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,464 : INFO : built Dictionary(48 unique tokens: ['busy', 'expat', 'day', 'keep', 'besides']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:18,480 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,481 : INFO : built Dictionary(27 unique tokens: ['ask', 'also', 'tell', 'occured', 'caught']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:18,489 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,490 : INFO : built Dictionary(22 unique tokens: ['tell', 'hello', 'also', 'please', 'wants']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:18,496 : INFO : Removed 32 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:18,497 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,498 : INFO : built Dictionary(29 unique tokens: ['group', 'conference', 'main', 'postings', 'also']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:18,506 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,507 : INFO : built Dictionary(24 unique tokens: ['park', 'moving', 'month', 'also', 'son']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:18,514 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,515 : INFO : built Dictionary(23 unique tokens: ['qatar', 'offers', 'salaries', 'also', 'one']...) from 2 documents (total 28 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:18,521 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,522 : INFO : built Dictionary(38 unique tokens: ['dies', 'dozed', 'anybody', 'afternoon', 'bus']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:18,534 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:18,535 : INFO : built Dictionary(31 unique tokens: ['living', 'woman', 'thanks', 'conference', 'lives']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:18,544 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:20,198 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,199 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,201 : INFO : built Dictionary(41 unique tokens: ['fourth', 'wife', 'website', 'review', 'means']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:20,219 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,220 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,221 : INFO : built Dictionary(46 unique tokens: ['know', 'form', 'rp', 'please', 'today']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:20,243 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,244 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,245 : INFO : built Dictionary(30 unique tokens: ['ask', 'receipt', 'review', 'thanks', 'rp']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:20,255 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,255 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,256 : INFO : built Dictionary(52 unique tokens: ['receipt', 'civil', 'rp', 'please', 'today']...) from 2 documents (total 79 corpus positions)\n", + "2018-09-11 22:26:20,286 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,287 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,288 : INFO : built Dictionary(31 unique tokens: ['receipt', 'review', 'schedule', 'still', 'meet']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:20,299 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,300 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,300 : INFO : built Dictionary(45 unique tokens: ['website', 'rp', 'please', 'apply', 'friends']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:20,322 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,323 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,324 : INFO : built Dictionary(44 unique tokens: ['receipt', 'civil', 'rp', 'son', 'please']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:20,343 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,344 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,345 : INFO : built Dictionary(45 unique tokens: ['receipt', 'rp', 'message', 'day', 'friends']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:20,365 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,366 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,367 : INFO : built Dictionary(26 unique tokens: ['receipt', 'renewal', 'review', 'thanks', 'rp']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:20,374 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:20,375 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,376 : INFO : built Dictionary(41 unique tokens: ['ask', 'wife', 'review', 'civil', 'profession']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:20,393 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:20,496 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,497 : INFO : built Dictionary(16 unique tokens: ['qatar', 'provided', 'transport', 'low', 'plus']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:20,506 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,507 : INFO : built Dictionary(30 unique tokens: ['nationals', 'qatar', 'office', 'transport', 'could']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:20,518 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,537 : INFO : built Dictionary(44 unique tokens: ['allowance', 'qar', 'please', 'accommodation', 'information']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:20,568 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,570 : INFO : built Dictionary(39 unique tokens: ['transport', 'qatar', 'people', 'appalling', 'seem']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:20,593 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,595 : INFO : built Dictionary(35 unique tokens: ['qatar', 'transport', 'thanks', 'benefits', 'qar']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:20,618 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,626 : INFO : built Dictionary(29 unique tokens: ['qatar', 'transport', 'junior', 'salaries', 'qar']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:20,640 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,642 : INFO : built Dictionary(34 unique tokens: ['info', 'work', 'qatar', 'salary', 'transport']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:20,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,662 : INFO : built Dictionary(22 unique tokens: ['qatar', 'transport', 'right', 'plus', 'qar']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:20,673 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,674 : INFO : built Dictionary(45 unique tokens: ['transport', 'deposit', 'ps', 'shift', 'qar']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:20,707 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:20,709 : INFO : built Dictionary(49 unique tokens: ['transport', 'need', 'qar', 'get', 'accommodation']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:20,738 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:22,402 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,404 : INFO : built Dictionary(21 unique tokens: ['enter', 'extend', 'visa', 'tell', 'futher']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:22,412 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,415 : INFO : built Dictionary(32 unique tokens: ['info', 'days', 'month', 'thanks', 'exam']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:22,428 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,430 : INFO : built Dictionary(33 unique tokens: ['wife', 'qatar', 'civil', 'even', 'month']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:22,438 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,439 : INFO : built Dictionary(16 unique tokens: ['driving', 'take', 'visitor', 'medical', 'soon']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:22,443 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,444 : INFO : built Dictionary(18 unique tokens: ['enter', 'renewal', 'required', 'month', 'visitor']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:22,448 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,449 : INFO : built Dictionary(44 unique tokens: ['info', 'read', 'soon', 'rp', 'card']...) from 2 documents (total 60 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:22,461 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,462 : INFO : built Dictionary(22 unique tokens: ['wife', 'qatar', 'soon', 'please', 'dear']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:22,467 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,468 : INFO : built Dictionary(20 unique tokens: ['qatar', 'visa', 'required', 'medical', 'recent']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:22,473 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,474 : INFO : built Dictionary(21 unique tokens: ['recommendations', 'qatar', 'required', 'deliver', 'month']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:26:22,479 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,480 : INFO : built Dictionary(16 unique tokens: ['qatar', 'extend', 'visitor', 'soon', 'month']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:22,484 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:22,801 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,803 : INFO : built Dictionary(24 unique tokens: ['backward', 'side', 'get', 'faster', 'regular']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:22,809 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,810 : INFO : built Dictionary(20 unique tokens: ['side', 'crap', 'block', 'retarted', 'audio']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:22,815 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,816 : INFO : built Dictionary(43 unique tokens: ['anxious', 'side', 'qatar', 'pls', 'nurse']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:22,831 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,832 : INFO : built Dictionary(42 unique tokens: ['selection', 'side', 'problem', 'faster', 'thought']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:22,847 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,848 : INFO : built Dictionary(26 unique tokens: ['side', 'problem', 'thought', 'sky', 'one']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:22,855 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,856 : INFO : built Dictionary(41 unique tokens: ['side', 'work', 'open', 'thought', 'wanted']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:22,873 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,875 : INFO : built Dictionary(38 unique tokens: ['side', 'qatar', 'would', 'back', 'holidays']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:22,894 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,896 : INFO : built Dictionary(20 unique tokens: ['give', 'side', 'problem', 'lasik', 'thought']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:22,903 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,904 : INFO : built Dictionary(23 unique tokens: ['side', 'problem', 'thought', 'evil', 'quote']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:22,912 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:22,913 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:22,915 : INFO : built Dictionary(47 unique tokens: ['trouble', 'dec', 'sitta', 'alone', 'ref']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:22,940 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:24,496 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,497 : INFO : built Dictionary(22 unique tokens: ['get', 'bike', 'need', 'ride', 'one']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:24,504 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,505 : INFO : built Dictionary(43 unique tokens: ['permit', 'get', 'bike', 'healthy', 'soon']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:24,518 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,519 : INFO : built Dictionary(23 unique tokens: ['reasonable', 'get', 'pls', 'cycles', 'service']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:24,525 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:24,526 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,527 : INFO : built Dictionary(31 unique tokens: ['bcoz', 'get', 'anybody', 'bike', 'qatar']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:24,536 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:24,537 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,537 : INFO : built Dictionary(34 unique tokens: ['newcomer', 'qatar', 'service', 'possesses', 'even']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:24,547 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,548 : INFO : built Dictionary(25 unique tokens: ['get', 'documents', 'bike', 'licence', 'fees']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:24,555 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:24,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,557 : INFO : built Dictionary(29 unique tokens: ['get', 'bike', 'valid', 'comments', 'rp']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:24,565 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,566 : INFO : built Dictionary(31 unique tokens: ['getting', 'get', 'back', 'pinoys', 'philippine']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:24,575 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,576 : INFO : built Dictionary(20 unique tokens: ['qatar', 'easy', 'sit', 'indian', 'royal']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:24,581 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,582 : INFO : built Dictionary(34 unique tokens: ['body', 'administration', 'changing', 'manager', 'bike']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:24,592 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:24,976 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:24,985 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:24,991 : INFO : built Dictionary(55 unique tokens: ['info', 'light', 'mbps', 'went', 'card']...) from 2 documents (total 71 corpus positions)\n", + "2018-09-11 22:26:25,041 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,043 : INFO : built Dictionary(30 unique tokens: ['getting', 'get', 'contradicts', 'customer', 'thought']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:25,049 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:25,050 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,051 : INFO : built Dictionary(60 unique tokens: ['understand', 'contradicts', 'qatars', 'mbps', 'winner']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:26:25,087 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,089 : INFO : built Dictionary(33 unique tokens: ['getting', 'get', 'contradicts', 'thought', 'reduce']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:25,097 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,098 : INFO : built Dictionary(37 unique tokens: ['getting', 'get', 'know', 'thought', 'could']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:25,111 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:25,113 : INFO : built Dictionary(56 unique tokens: ['ask', 'would', 'experienced', 'connection', 'mbps']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:26:25,143 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,145 : INFO : built Dictionary(54 unique tokens: ['info', 'signs', 'pack', 'value', 'else']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:25,175 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,177 : INFO : built Dictionary(31 unique tokens: ['website', 'getting', 'get', 'shed', 'thought']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:25,185 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,186 : INFO : built Dictionary(42 unique tokens: ['getting', 'moving', 'light', 'thought', 'qtel']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:25,203 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:25,204 : INFO : built Dictionary(44 unique tokens: ['spoken', 'connection', 'mbps', 'get', 'jump']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:25,225 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:26,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,661 : INFO : built Dictionary(32 unique tokens: ['getting', 'qatar', 'formalities', 'thanks', 'time']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:26,673 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,674 : INFO : built Dictionary(30 unique tokens: ['work', 'get', 'thanks', 'time', 'statement']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:26,685 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,686 : INFO : built Dictionary(36 unique tokens: ['wife', 'work', 'weeks', 'thanks', 'visit']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:26,700 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,701 : INFO : built Dictionary(41 unique tokens: ['work', 'qatar', 'thanks', 'comes', 'statement']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:26,718 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,720 : INFO : built Dictionary(40 unique tokens: ['bank', 'body', 'administration', 'changing', 'manager']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:26,735 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,736 : INFO : built Dictionary(47 unique tokens: ['worry', 'qatari', 'longer', 'please', 'forget']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:26,756 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,757 : INFO : built Dictionary(53 unique tokens: ['soon', 'please', 'contacts', 'appreciated', 'model']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:26:26,782 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:26,783 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,784 : INFO : built Dictionary(42 unique tokens: ['side', 'country', 'break', 'qnb', 'need']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:26,800 : INFO : Removed 3 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:26,801 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,802 : INFO : built Dictionary(43 unique tokens: ['indonesia', 'qatar', 'thanks', 'sertificate', 'time']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:26,819 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:26,820 : INFO : built Dictionary(42 unique tokens: ['current', 'missing', 'countries', 'scope', 'thanks']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:26,836 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:27,308 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,310 : INFO : built Dictionary(24 unique tokens: ['received', 'email', 'link', 'went', 'message']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:27,328 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,329 : INFO : built Dictionary(30 unique tokens: ['received', 'happend', 'email', 'link', 'went']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:27,336 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,337 : INFO : built Dictionary(59 unique tokens: ['bundle', 'ok', 'link', 'page', 'forgets']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:26:27,374 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,385 : INFO : built Dictionary(48 unique tokens: ['better', 'regular', 'updated', 'went', 'message']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:27,419 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,421 : INFO : built Dictionary(27 unique tokens: ['received', 'email', 'link', 'painful', 'went']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:27,430 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,432 : INFO : built Dictionary(37 unique tokens: ['received', 'happend', 'night', 'closed', 'mean']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:27,451 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,453 : INFO : built Dictionary(55 unique tokens: ['received', 'okay', 'dirty', 'link', 'qatari']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:27,487 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,488 : INFO : built Dictionary(39 unique tokens: ['received', 'happend', 'email', 'url_token', 'scarlett']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:27,503 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,504 : INFO : built Dictionary(51 unique tokens: ['received', 'section', 'know', 'link', 'rules']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:27,531 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:27,547 : INFO : built Dictionary(41 unique tokens: ['received', 'qatar', 'email', 'women', 'link']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:27,580 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:28,894 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,895 : INFO : built Dictionary(29 unique tokens: ['permits', 'permit', 'qatar', 'month', 'means']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:28,905 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,906 : INFO : built Dictionary(48 unique tokens: ['permit', 'open', 'need', 'employers', 'qa']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:28,924 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,926 : INFO : built Dictionary(18 unique tokens: ['runs', 'permit', 'qatar', 'month', 'rp']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:28,931 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,932 : INFO : built Dictionary(33 unique tokens: ['permit', 'get', 'need', 'form', 'month']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:28,943 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,944 : INFO : built Dictionary(24 unique tokens: ['permit', 'entry', 'employment', 'month', 'rp']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:28,951 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,952 : INFO : built Dictionary(28 unique tokens: ['permit', 'qatar', 'thanks', 'already', 'month']...) from 2 documents (total 45 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:28,962 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,962 : INFO : built Dictionary(26 unique tokens: ['book', 'getting', 'qatar', 'wife', 'thanks']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:28,971 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,972 : INFO : built Dictionary(28 unique tokens: ['permit', 'qatar', 'woman', 'relocate', 'month']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:28,980 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:28,981 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,982 : INFO : built Dictionary(23 unique tokens: ['work', 'get', 'pls', 'follow', 'need']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:28,989 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:28,990 : INFO : built Dictionary(20 unique tokens: ['temporary', 'exit', 'need', 'permit', 'month']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:28,995 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:29,655 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:29,656 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,657 : INFO : built Dictionary(30 unique tokens: ['get', 'spelling', 'thanks', 'hot', 'weeding']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:29,664 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,665 : INFO : built Dictionary(26 unique tokens: ['get', 'thanks', 'hot', 'blouse', 'mainly']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:29,670 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,671 : INFO : built Dictionary(21 unique tokens: ['hot', 'get', 'casual', 'thanks', 'saree']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:29,676 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,677 : INFO : built Dictionary(14 unique tokens: ['advanced', 'wishes', 'get', 'mallus', 'programs']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:26:29,680 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,681 : INFO : built Dictionary(21 unique tokens: ['souq', 'get', 'al', 'area', 'thanks']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:29,685 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,686 : INFO : built Dictionary(20 unique tokens: ['selection', 'hot', 'get', 'bombay', 'indian']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:29,690 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,691 : INFO : built Dictionary(25 unique tokens: ['get', 'group', 'n', 'could', 'soon']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:29,696 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:29,697 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,698 : INFO : built Dictionary(21 unique tokens: ['especially', 'hot', 'get', 'look', 'comfortable']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:29,703 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,703 : INFO : built Dictionary(20 unique tokens: ['knew', 'get', 'wedding', 'thanks', 'indian']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:29,708 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:29,709 : INFO : built Dictionary(38 unique tokens: ['wife', 'get', 'deposit', 'doctor', 'nurse']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:29,717 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:31,058 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:31,067 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,069 : INFO : built Dictionary(41 unique tokens: ['cancelled', 'since', 'qatar', 'countries', 'visit']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:31,092 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,093 : INFO : built Dictionary(28 unique tokens: ['cancelled', 'get', 'countries', 'passport', 'right']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:31,106 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:31,107 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,108 : INFO : built Dictionary(43 unique tokens: ['wife', 'cancelled', 'qatar', 'know', 'sharing']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:31,127 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,128 : INFO : built Dictionary(33 unique tokens: ['wife', 'cancelled', 'qatar', 'countries', 'passport']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:31,139 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:31,140 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,141 : INFO : built Dictionary(28 unique tokens: ['cancelled', 'qatar', 'entry', 'countries', 'passport']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:31,152 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,165 : INFO : built Dictionary(44 unique tokens: ['ask', 'cancelled', 'countries', 'evening', 'went']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:31,195 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,204 : INFO : built Dictionary(25 unique tokens: ['cancelled', 'work', 'get', 'countries', 'passport']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:31,218 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,224 : INFO : built Dictionary(39 unique tokens: ['cancelled', 'qatar', 'countries', 'passport', 'soon']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:31,250 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,251 : INFO : built Dictionary(36 unique tokens: ['ask', 'wife', 'qatar', 'would', 'passport']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:31,264 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,265 : INFO : built Dictionary(41 unique tokens: ['aside', 'cancelled', 'qatar', 'know', 'passport']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:31,279 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:31,737 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,738 : INFO : built Dictionary(23 unique tokens: ['okay', 'spouse', 'hello', 'masters', 'kids']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:31,755 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,756 : INFO : built Dictionary(44 unique tokens: ['okay', 'threads', 'thousand', 'good', 'asked']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:31,775 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,776 : INFO : built Dictionary(35 unique tokens: ['okay', 'spouse', 'thanks', 'degree', 'hello']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:31,788 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,790 : INFO : built Dictionary(32 unique tokens: ['living', 'okay', 'spouse', 'hello', 'masters']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:31,803 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,804 : INFO : built Dictionary(41 unique tokens: ['offered', 'spa', 'spouse', 'deal', 'k']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:31,822 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:31,823 : INFO : built Dictionary(56 unique tokens: ['revising', 'spouse', 'almost', 'masters', 'good']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:31,853 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,855 : INFO : built Dictionary(48 unique tokens: ['suzuki', 'okay', 'spouse', 'full', 'qar']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:31,880 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,881 : INFO : built Dictionary(39 unique tokens: ['get', 'okay', 'spouse', 'thanks', 'say']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:31,896 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,898 : INFO : built Dictionary(46 unique tokens: ['drive', 'okay', 'spouse', 'cost', 'good']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:31,919 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:31,920 : INFO : built Dictionary(45 unique tokens: ['ask', 'okay', 'spouse', 'transfer', 'planning']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:31,938 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:33,321 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,323 : INFO : built Dictionary(22 unique tokens: ['work', 'qatar', 'stamped', 'payment', 'applying']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:33,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,330 : INFO : built Dictionary(34 unique tokens: ['work', 'problem', 'pay', 'payment', 'qatar']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:33,340 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,341 : INFO : built Dictionary(26 unique tokens: ['work', 'qatar', 'step', 'right', 'payment']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:33,348 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,349 : INFO : built Dictionary(33 unique tokens: ['work', 'qatar', 'know', 'schedule', 'valid']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:33,359 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,360 : INFO : built Dictionary(36 unique tokens: ['said', 'qatar', 'know', 'companies', 'work']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:33,371 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,372 : INFO : built Dictionary(45 unique tokens: ['resign', 'said', 'cover', 'work', 'soon']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:33,388 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,389 : INFO : built Dictionary(38 unique tokens: ['work', 'reply', 'approve', 'review', 'showing']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:33,401 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,402 : INFO : built Dictionary(34 unique tokens: ['work', 'body', 'know', 'administration', 'changing']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:33,412 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,414 : INFO : built Dictionary(41 unique tokens: ['work', 'payment', 'reagarding', 'transfering', 'transfer']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:33,427 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:33,428 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,429 : INFO : built Dictionary(47 unique tokens: ['resign', 'work', 'pls', 'qatari', 'contract']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:33,444 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:33,973 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,974 : INFO : built Dictionary(18 unique tokens: ['immigration', 'wife', 'country', 'visa', 'thanks']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:33,985 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,986 : INFO : built Dictionary(30 unique tokens: ['wife', 'schedule', 'thanks', 'meet', 'month']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:33,996 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:33,997 : INFO : built Dictionary(45 unique tokens: ['need', 'else', 'sponsorship', 'went', 'residency']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:34,018 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,020 : INFO : built Dictionary(40 unique tokens: ['wife', 'ant', 'went', 'need', 'thanks']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:34,038 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,056 : INFO : built Dictionary(29 unique tokens: ['wife', 'work', 'steps', 'visit', 'thanks']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:34,070 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,076 : INFO : built Dictionary(48 unique tokens: ['website', 'visit', 'went', 'please', 'residency']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:34,128 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,134 : INFO : built Dictionary(30 unique tokens: ['wife', 'thanks', 'transfer', 'went', 'possible']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:34,149 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,150 : INFO : built Dictionary(39 unique tokens: ['wife', 'work', 'visit', 'weeks', 'thanks']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:34,168 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,169 : INFO : built Dictionary(40 unique tokens: ['wife', 'know', 'visit', 'thanks', 'country']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:34,186 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:34,188 : INFO : built Dictionary(51 unique tokens: ['wont', 'allow', 'answers', 'visit', 'went']...) from 2 documents (total 76 corpus positions)\n", + "2018-09-11 22:26:34,212 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:35,460 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,470 : INFO : built Dictionary(49 unique tokens: ['said', 'pls', 'ill', 'il', 'qatar']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:35,518 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:35,524 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,529 : INFO : built Dictionary(31 unique tokens: ['living', 'amounting', 'thanks', 'month', 'got']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:35,541 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,542 : INFO : built Dictionary(24 unique tokens: ['getting', 'qatar', 'ok', 'thanks', 'work']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:35,549 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,550 : INFO : built Dictionary(41 unique tokens: ['ask', 'someone', 'qatar', 'interviews', 'nurse']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:35,564 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,566 : INFO : built Dictionary(31 unique tokens: ['friend', 'said', 'qatar', 'thanks', 'work']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:35,577 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,578 : INFO : built Dictionary(40 unique tokens: ['anxious', 'qatar', 'pls', 'mean', 'nurse']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:35,593 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:35,595 : INFO : built Dictionary(48 unique tokens: ['someone', 'know', 'following', 'garage', 'please']...) from 2 documents (total 77 corpus positions)\n", + "2018-09-11 22:26:35,613 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,615 : INFO : built Dictionary(24 unique tokens: ['received', 'thanks', 'soon', 'qar', 'moving']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:35,622 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,623 : INFO : built Dictionary(33 unique tokens: ['ask', 'wanna', 'qatar', 'ok', 'thanks']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:35,634 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:35,635 : INFO : built Dictionary(31 unique tokens: ['taken', 'admin', 'mean', 'thanks', 'care']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:35,645 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:36,156 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,158 : INFO : built Dictionary(13 unique tokens: ['drive', 'vehicles', 'x', 'beaches', 'roads']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:36,164 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,165 : INFO : built Dictionary(20 unique tokens: ['qatar', 'vehicles', 'x', 'northern', 'beaches']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:36,170 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,171 : INFO : built Dictionary(40 unique tokens: ['qatar', 'charges', 'actually', 'clean', 'beaches']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:36,186 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,187 : INFO : built Dictionary(48 unique tokens: ['cold', 'drive', 'impressions', 'say', 'beach']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:36,205 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,207 : INFO : built Dictionary(27 unique tokens: ['qatar', 'pls', 'thanks', 'beaches', 'beach']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:36,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,216 : INFO : built Dictionary(25 unique tokens: ['qatar', 'beaches', 'leading', 'drive', 'good']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:36,223 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,224 : INFO : built Dictionary(20 unique tokens: ['sea', 'drive', 'beach', 'x', 'beaches']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:36,229 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:36,230 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,231 : INFO : built Dictionary(26 unique tokens: ['qatar', 'beaches', 'say', 'beach', 'quiet']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:36,239 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,257 : INFO : built Dictionary(27 unique tokens: ['crowded', 'get', 'thanks', 'beaches', 'beach']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:36,269 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:36,274 : INFO : built Dictionary(46 unique tokens: ['drive', 'chat', 'beach', 'please', 'food']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:36,304 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:37,638 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,640 : INFO : built Dictionary(27 unique tokens: ['pls', 'thanks', 'electricity', 'also', 'bill']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:37,650 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:37,651 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,652 : INFO : built Dictionary(46 unique tokens: ['pls', 'rules', 'k', 'appreciated', 'contract']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:37,673 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,674 : INFO : built Dictionary(39 unique tokens: ['uae', 'electricity', 'real', 'wait', 'asking']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:37,690 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,691 : INFO : built Dictionary(27 unique tokens: ['wife', 'pls', 'thanks', 'transfer', 'also']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:37,700 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,701 : INFO : built Dictionary(37 unique tokens: ['wife', 'qatar', 'pls', 'civil', 'even']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:37,718 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:37,734 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,735 : INFO : built Dictionary(46 unique tokens: ['pls', 'day', 'birth', 'house', 'contract']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:37,766 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,768 : INFO : built Dictionary(42 unique tokens: ['ask', 'wife', 'qatar', 'would', 'thanks']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:26:37,795 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,797 : INFO : built Dictionary(26 unique tokens: ['pls', 'schedule', 'thanks', 'meet', 'month']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:37,813 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,815 : INFO : built Dictionary(44 unique tokens: ['website', 'pls', 'please', 'house', 'contract']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:37,833 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:37,834 : INFO : built Dictionary(35 unique tokens: ['friend', 'pls', 'service', 'thanks', 'shall']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:37,849 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:38,377 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,378 : INFO : built Dictionary(19 unique tokens: ['wife', 'wondering', 'moving', 'time', 'thanks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:38,390 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,391 : INFO : built Dictionary(38 unique tokens: ['wife', 'work', 'moving', 'admin', 'regular']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:38,407 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,408 : INFO : built Dictionary(41 unique tokens: ['wife', 'moving', 'june', 'full', 'january']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:38,427 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,442 : INFO : built Dictionary(51 unique tokens: ['wife', 'full', 'moving', 'alot', 'message']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:38,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,483 : INFO : built Dictionary(41 unique tokens: ['hi', 'wife', 'moving', 'curious', 'full']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:38,508 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,510 : INFO : built Dictionary(34 unique tokens: ['wife', 'qatar', 'full', 'men', 'situation']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:38,530 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,531 : INFO : built Dictionary(47 unique tokens: ['full', 'diyan', 'son', 'moving', 'good']...) from 2 documents (total 59 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:38,554 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:38,555 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,556 : INFO : built Dictionary(29 unique tokens: ['wife', 'moving', 'actually', 'doctor', 'full']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:38,564 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:38,565 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,566 : INFO : built Dictionary(48 unique tokens: ['make', 'group', 'full', 'valid', 'comming']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:38,588 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:38,590 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:38,591 : INFO : built Dictionary(40 unique tokens: ['thanks', 'wife', 'moving', 'full', 'darling']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:38,607 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:39,830 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,832 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,833 : INFO : built Dictionary(23 unique tokens: ['open', 'spelling', 'went', 'eid', 'viewing']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:39,838 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,839 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,840 : INFO : built Dictionary(17 unique tokens: ['park', 'open', 'al', 'near', 'spelling']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:39,844 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,845 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,846 : INFO : built Dictionary(28 unique tokens: ['spelling', 'people', 'tell', 'right', 'things']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:39,853 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,854 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,855 : INFO : built Dictionary(14 unique tokens: ['qatar', 'near', 'spelling', 'monkeys', 'zoo']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:26:39,858 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,859 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,859 : INFO : built Dictionary(12 unique tokens: ['open', 'ramadan', 'near', 'spelling', 'zoo']...) from 2 documents (total 16 corpus positions)\n", + "2018-09-11 22:26:39,862 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,863 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,864 : INFO : built Dictionary(12 unique tokens: ['open', 'near', 'spelling', 'today', 'doha']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:26:39,866 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,867 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,868 : INFO : built Dictionary(25 unique tokens: ['book', 'drive', 'actually', 'spelling', 'else']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:39,874 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,875 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,875 : INFO : built Dictionary(15 unique tokens: ['open', 'near', 'spelling', 'thanks', 'zoo']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:26:39,879 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,880 : INFO : built Dictionary(13 unique tokens: ['qatar', 'near', 'spelling', 'places', 'zoo']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:26:39,883 : INFO : Removed 0 and 2 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:39,884 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:39,884 : INFO : built Dictionary(17 unique tokens: ['stops', 'take', 'near', 'spelling', 'bus']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:39,888 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:40,602 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,606 : INFO : built Dictionary(24 unique tokens: ['get', 'seen', 'could', 'even', 'opportunities']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:40,634 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,636 : INFO : built Dictionary(42 unique tokens: ['get', 'one', 'bayt', 'anybody', 'cv']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:40,665 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,667 : INFO : built Dictionary(52 unique tokens: ['ask', 'interviews', 'nurse', 'attend', 'clinics']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:40,702 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,704 : INFO : built Dictionary(43 unique tokens: ['get', 'seen', 'could', 'even', 'route']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:40,722 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,723 : INFO : built Dictionary(43 unique tokens: ['wife', 'moving', 'seen', 'thanks', 'could']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:40,741 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:40,743 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,744 : INFO : built Dictionary(57 unique tokens: ['mom', 'admin', 'helpfull', 'work', 'day']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:40,776 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,778 : INFO : built Dictionary(43 unique tokens: ['interesting', 'education', 'get', 'also', 'seen']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:40,797 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,798 : INFO : built Dictionary(49 unique tokens: ['provide', 'seen', 'release', 'liabilities', 'card']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:40,826 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,828 : INFO : built Dictionary(53 unique tokens: ['seen', 'totally', 'soon', 'lebanese', 'much']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:40,856 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:40,858 : INFO : built Dictionary(39 unique tokens: ['get', 'luck', 'seen', 'could', 'even']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:40,874 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:41,695 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,696 : INFO : built Dictionary(26 unique tokens: ['getting', 'get', 'went', 'wait', 'long']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:26:41,703 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,704 : INFO : built Dictionary(35 unique tokens: ['wife', 'permit', 'get', 'went', 'thanks']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:41,717 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,718 : INFO : built Dictionary(32 unique tokens: ['getting', 'get', 'wait', 'pro', 'went']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:26:41,730 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:41,731 : INFO : built Dictionary(44 unique tokens: ['changing', 'manager', 'agent', 'went', 'please']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:41,749 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,750 : INFO : built Dictionary(43 unique tokens: ['info', 'getting', 'qatar', 'days', 'tell']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:41,769 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,771 : INFO : built Dictionary(38 unique tokens: ['getting', 'get', 'days', 'passport', 'country']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:41,785 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,786 : INFO : built Dictionary(48 unique tokens: ['ask', 'god', 'soon', 'went', 'please']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:41,809 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,810 : INFO : built Dictionary(41 unique tokens: ['permit', 'get', 'steps', 'curious', 'thanks']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:41,826 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,828 : INFO : built Dictionary(38 unique tokens: ['ask', 'getting', 'get', 'employment', 'government']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:41,842 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:41,843 : INFO : built Dictionary(34 unique tokens: ['wife', 'within', 'get', 'query', 'passport']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:41,856 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:42,780 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,781 : INFO : built Dictionary(39 unique tokens: ['getting', 'muslim', 'n', 'shop', 'comments']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:42,797 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,798 : INFO : built Dictionary(38 unique tokens: ['shop', 'heard', 'explain', 'almost', 'even']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:42,814 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,815 : INFO : built Dictionary(25 unique tokens: ['c', 'thanks', 'ring', 'explain', 'opens']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:42,823 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,824 : INFO : built Dictionary(28 unique tokens: ['qatar', 'good', 'explain', 'recommend', 'landmark']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:42,834 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:42,835 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,836 : INFO : built Dictionary(49 unique tokens: ['would', 'threads', 'recommend', 'strippers', 'good']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:42,858 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,859 : INFO : built Dictionary(36 unique tokens: ['wanna', 'moving', 'explain', 'christmas', 'thanks']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:42,871 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,872 : INFO : built Dictionary(27 unique tokens: ['ok', 'thanks', 'explain', 'free', 'good']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:42,880 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,881 : INFO : built Dictionary(26 unique tokens: ['qatar', 'thanks', 'explain', 'selection', 'good']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:42,887 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:42,888 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,889 : INFO : built Dictionary(28 unique tokens: ['url_token', 'thanks', 'r', 'explain', 'india']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:42,897 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:42,898 : INFO : built Dictionary(30 unique tokens: ['thanks', 'could', 'explain', 'cats', 'pet']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:42,908 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:43,746 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:43,747 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,748 : INFO : built Dictionary(36 unique tokens: ['cancelled', 'get', 'anybody', 'valid', 'comments']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:43,764 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:43,765 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,766 : INFO : built Dictionary(42 unique tokens: ['newcomer', 'cancelled', 'qatar', 'back', 'possesses']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:43,784 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,785 : INFO : built Dictionary(31 unique tokens: ['cancelled', 'drive', 'validity', 'valid', 'qatari']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:43,798 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,799 : INFO : built Dictionary(32 unique tokens: ['cancelled', 'drive', 'validity', 'thanks', 'could']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:43,810 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,811 : INFO : built Dictionary(34 unique tokens: ['friend', 'permit', 'qatar', 'uae', 'cancelled']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:43,823 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,824 : INFO : built Dictionary(39 unique tokens: ['cancelled', 'knowing', 'qatar', 'rp', 'follow']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:43,840 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,841 : INFO : built Dictionary(26 unique tokens: ['cancelled', 'drive', 'could', 'long', 'qatari']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:43,848 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,849 : INFO : built Dictionary(37 unique tokens: ['info', 'cancelled', 'qatar', 'valid', 'companies']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:43,863 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,864 : INFO : built Dictionary(40 unique tokens: ['cancelled', 'qatar', 'sq', 'need', 'thanks']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:43,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:43,880 : INFO : built Dictionary(36 unique tokens: ['cancelled', 'qatar', 'back', 'thanks', 'could']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:43,892 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:44,799 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,801 : INFO : built Dictionary(49 unique tokens: ['honestly', 'shop', 'drive', 'labour', 'valid']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:44,824 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,825 : INFO : built Dictionary(47 unique tokens: ['wanna', 'shop', 'drive', 'labour', 'valid']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:44,844 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,846 : INFO : built Dictionary(60 unique tokens: ['shop', 'drive', 'labour', 'valid', 'descent']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:44,882 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:44,884 : INFO : built Dictionary(63 unique tokens: ['work', 'drive', 'labour', 'clean', 'shop']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:26:44,924 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,925 : INFO : built Dictionary(31 unique tokens: ['shop', 'drive', 'dhabi', 'labour', 'laws']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:26:44,952 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,953 : INFO : built Dictionary(49 unique tokens: ['work', 'drive', 'better', 'shop', 'benefits']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:44,977 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,978 : INFO : built Dictionary(40 unique tokens: ['shop', 'drive', 'dhabi', 'located', 'tell']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:44,990 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:44,991 : INFO : built Dictionary(50 unique tokens: ['scenarios', 'work', 'drive', 'labour', 'shop']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:26:45,016 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,017 : INFO : built Dictionary(42 unique tokens: ['shop', 'drive', 'dhabi', 'labour', 'laws']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:45,033 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,034 : INFO : built Dictionary(62 unique tokens: ['shop', 'drive', 'labour', 'valid', 'qatari']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:26:45,072 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:45,660 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:45,661 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,662 : INFO : built Dictionary(51 unique tokens: ['scratch', 'wich', 'need', 'incident', 'evening']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:45,683 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:45,684 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,685 : INFO : built Dictionary(25 unique tokens: ['scratch', 'qatar', 'need', 'bumper', 'police']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:26:45,692 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,693 : INFO : built Dictionary(36 unique tokens: ['get', 'accent', 'wich', 'receiving', 'guy']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:45,705 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,706 : INFO : built Dictionary(44 unique tokens: ['ask', 'went', 'stone', 'road', 'get']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:45,724 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,725 : INFO : built Dictionary(25 unique tokens: ['moving', 'qatar', 'police', 'applying', 'job']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:45,732 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,733 : INFO : built Dictionary(26 unique tokens: ['get', 'people', 'runaways', 'srilankan', 'srilanka']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:45,740 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,741 : INFO : built Dictionary(24 unique tokens: ['get', 'qnb', 'cbq', 'payment', 'police']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:45,747 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,748 : INFO : built Dictionary(45 unique tokens: ['picture', 'light', 'back', 'damaged', 'swerved']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:45,765 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,766 : INFO : built Dictionary(32 unique tokens: ['qatar', 'security', 'srilankan', 'srilanka', 'longer']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:45,776 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:45,777 : INFO : built Dictionary(18 unique tokens: ['police', 'qatar', 'srilankan', 'advice', 'wich']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:26:45,780 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:46,740 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,741 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,742 : INFO : built Dictionary(52 unique tokens: ['received', 'arrest', 'system', 'security', 'cards']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:26:46,768 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,769 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,770 : INFO : built Dictionary(56 unique tokens: ['work', 'open', 'security', 'systems', 'shop']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:26:46,801 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,802 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,802 : INFO : built Dictionary(35 unique tokens: ['arrest', 'friend', 'problem', 'back', 'freind']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:46,809 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,809 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,810 : INFO : built Dictionary(39 unique tokens: ['arrest', 'friend', 'problem', 'back', 'freind']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:46,820 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,821 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,822 : INFO : built Dictionary(58 unique tokens: ['work', 'qnb', 'course', 'cards', 'problem']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:26:46,852 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,853 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,854 : INFO : built Dictionary(55 unique tokens: ['work', 'security', 'shop', 'upset', 'qatar']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:46,883 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,884 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,885 : INFO : built Dictionary(64 unique tokens: ['friend', 'khebra', 'full', 'shop', 'police']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:26:46,922 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,923 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,924 : INFO : built Dictionary(66 unique tokens: ['cancelled', 'arrest', 'security', 'release', 'shop']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:46,965 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,966 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,966 : INFO : built Dictionary(47 unique tokens: ['wanna', 'shop', 'would', 'ok', 'asked']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:46,986 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:46,987 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:46,987 : INFO : built Dictionary(48 unique tokens: ['trouble', 'shop', 'security', 'expat', 'problem']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:47,007 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:47,430 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:47,431 : INFO : built Dictionary(38 unique tokens: ['reply', 'approve', 'review', 'showing', 'daughter']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:47,443 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:47,444 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,445 : INFO : built Dictionary(34 unique tokens: ['accepted', 'pls', 'review', 'ok', 'showing']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:47,455 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,456 : INFO : built Dictionary(37 unique tokens: ['wife', 'counter', 'qar', 'submission', 'rp']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:47,467 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,468 : INFO : built Dictionary(43 unique tokens: ['ask', 'get', 'went', 'thanks', 'meet']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:47,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,482 : INFO : built Dictionary(21 unique tokens: ['got', 'application', 'schedule', 'meet', 'month']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:47,488 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,489 : INFO : built Dictionary(30 unique tokens: ['ask', 'wife', 'civil', 'profession', 'qar']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:47,498 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,498 : INFO : built Dictionary(17 unique tokens: ['application', 'salary', 'found', 'qar', 'mean']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:47,502 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:47,503 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,504 : INFO : built Dictionary(35 unique tokens: ['fourth', 'wife', 'website', 'review', 'means']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:47,514 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,515 : INFO : built Dictionary(33 unique tokens: ['body', 'administration', 'changing', 'manager', 'qar']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:26:47,525 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:47,526 : INFO : built Dictionary(42 unique tokens: ['review', 'keeper', 'showing', 'time', 'situation']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:47,538 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:48,667 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,668 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,669 : INFO : built Dictionary(23 unique tokens: ['qatar', 'blackcat', 'answers', 'comments', 'evening']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:48,674 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,674 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,675 : INFO : built Dictionary(15 unique tokens: ['living', 'works', 'see', 'pic', 'know']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:26:48,678 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,679 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,680 : INFO : built Dictionary(9 unique tokens: ['guevara', 'cornich', 'che', 'pic', 'see']...) from 2 documents (total 11 corpus positions)\n", + "2018-09-11 22:26:48,681 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,682 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,683 : INFO : built Dictionary(23 unique tokens: ['get', 'internship', 'study', 'p', 'qatari']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:48,688 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,688 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,689 : INFO : built Dictionary(23 unique tokens: ['ask', 'get', 'people', 'see', 'fellow']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:48,694 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,694 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,695 : INFO : built Dictionary(35 unique tokens: ['ask', 'qatar', 'worried', 'jazeera', 'thanks']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:48,703 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,704 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,704 : INFO : built Dictionary(12 unique tokens: ['become', 'see', 'pic', 'evening', 'afternoon']...) from 2 documents (total 15 corpus positions)\n", + "2018-09-11 22:26:48,707 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,707 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,708 : INFO : built Dictionary(17 unique tokens: ['get', 'see', 'pic', 'vet', 'location']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:26:48,711 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,712 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,713 : INFO : built Dictionary(11 unique tokens: ['guevara', 'get', 'car', 'board', 'funny']...) from 2 documents (total 14 corpus positions)\n", + "2018-09-11 22:26:48,715 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:48,716 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:48,717 : INFO : built Dictionary(38 unique tokens: ['serving', 'get', 'see', 'garveys', 'october']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:48,725 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:49,196 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,197 : INFO : built Dictionary(27 unique tokens: ['get', 'due', 'please', 'heard', 'cannot']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:49,207 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,208 : INFO : built Dictionary(33 unique tokens: ['get', 'air', 'please', 'anything', 'salaam']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:49,219 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:49,220 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,221 : INFO : built Dictionary(22 unique tokens: ['get', 'thanks', 'advise', 'please', 'months']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:49,227 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,228 : INFO : built Dictionary(36 unique tokens: ['wife', 'ant', 'get', 'may', 'thanks']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:49,241 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,242 : INFO : built Dictionary(33 unique tokens: ['rejected', 'get', 'limit', 'max', 'soon']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:26:49,254 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,255 : INFO : built Dictionary(24 unique tokens: ['fact', 'get', 'people', 'limit', 'arrival']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:49,261 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,262 : INFO : built Dictionary(32 unique tokens: ['wife', 'reply', 'limit', 'also', 'get']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:49,273 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,274 : INFO : built Dictionary(27 unique tokens: ['work', 'get', 'thanks', 'please', 'aquiring']...) from 2 documents (total 39 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:49,282 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,283 : INFO : built Dictionary(52 unique tokens: ['ask', 'nurse', 'please', 'good', 'may']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:49,304 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:49,305 : INFO : built Dictionary(48 unique tokens: ['formalities', 'rules', 'card', 'appreciated', 'required']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:49,325 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:50,417 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,418 : INFO : built Dictionary(32 unique tokens: ['ask', 'book', 'get', 'said', 'elsewhere']...) from 2 documents (total 86 corpus positions)\n", + "2018-09-11 22:26:50,448 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,449 : INFO : built Dictionary(54 unique tokens: ['ask', 'book', 'ticket', 'said', 'free']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:26:50,479 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:50,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,481 : INFO : built Dictionary(64 unique tokens: ['cancelled', 'ask', 'release', 'said', 'manager']...) from 2 documents (total 82 corpus positions)\n", + "2018-09-11 22:26:50,522 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,523 : INFO : built Dictionary(53 unique tokens: ['ask', 'work', 'said', 'qatar', 'return']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:50,552 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,553 : INFO : built Dictionary(51 unique tokens: ['cancelled', 'ask', 'said', 'work', 'please']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:26:50,576 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:50,577 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,578 : INFO : built Dictionary(63 unique tokens: ['ask', 'work', 'said', 'december', 'indians']...) from 2 documents (total 87 corpus positions)\n", + "2018-09-11 22:26:50,617 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,618 : INFO : built Dictionary(53 unique tokens: ['ask', 'book', 'ticket', 'changing', 'manager']...) from 2 documents (total 77 corpus positions)\n", + "2018-09-11 22:26:50,645 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,647 : INFO : built Dictionary(53 unique tokens: ['ask', 'permit', 'need', 'visit', 'soon']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:50,672 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,673 : INFO : built Dictionary(40 unique tokens: ['ask', 'book', 'get', 'said', 'elsewhere']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:50,684 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,685 : INFO : built Dictionary(59 unique tokens: ['resign', 'wife', 'allow', 'ticket', 'happen']...) from 2 documents (total 84 corpus positions)\n", + "2018-09-11 22:26:50,720 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:50,993 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:50,995 : INFO : built Dictionary(53 unique tokens: ['best', 'healthy', 'getting', 'watchers', 'please']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:26:51,019 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,020 : INFO : built Dictionary(32 unique tokens: ['getting', 'seem', 'slimming', 'deliver', 'prepare']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:51,031 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,032 : INFO : built Dictionary(29 unique tokens: ['getting', 'slimming', 'seem', 'rates', 'watchers']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:51,041 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,042 : INFO : built Dictionary(45 unique tokens: ['info', 'tell', 'watchers', 'overweight', 'months']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:51,061 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,062 : INFO : built Dictionary(33 unique tokens: ['would', 'getting', 'qatar', 'group', 'slimming']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:51,073 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,074 : INFO : built Dictionary(59 unique tokens: ['ask', 'getting', 'constantly', 'watchers', 'day']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:51,105 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,106 : INFO : built Dictionary(52 unique tokens: ['supplier', 'know', 'tell', 'watchers', 'get']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:26:51,130 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,131 : INFO : built Dictionary(21 unique tokens: ['getting', 'world', 'hypnosis', 'seem', 'useful']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:26:51,134 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,135 : INFO : built Dictionary(51 unique tokens: ['someone', 'know', 'willing', 'watchers', 'living']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:51,158 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:51,159 : INFO : built Dictionary(43 unique tokens: ['hi', 'getting', 'qatar', 'need', 'thanks']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:26:51,177 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:52,421 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,422 : INFO : built Dictionary(12 unique tokens: ['living', 'everyone', 'thanks', 'hows', 'qatar']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:52,428 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,429 : INFO : built Dictionary(46 unique tokens: ['hows', 'happen', 'professor', 'recommended', 'people']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:26:52,445 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,446 : INFO : built Dictionary(18 unique tokens: ['qatar', 'everyone', 'tell', 'hows', 'thanks']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:26:52,450 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,451 : INFO : built Dictionary(46 unique tokens: ['hows', 'qar', 'living', 'ur', 'average']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:52,468 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,469 : INFO : built Dictionary(31 unique tokens: ['ask', 'friend', 'get', 'pls', 'tell']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:52,479 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,480 : INFO : built Dictionary(31 unique tokens: ['qatar', 'get', 'c', 'tell', 'hows']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:52,490 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,491 : INFO : built Dictionary(46 unique tokens: ['work', 'tell', 'clean', 'flaming', 'living']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:52,508 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,509 : INFO : built Dictionary(34 unique tokens: ['cancelled', 'body', 'council', 'tell', 'hows']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:26:52,520 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,521 : INFO : built Dictionary(39 unique tokens: ['south', 'get', 'follow', 'hows', 'whether']...) from 2 documents (total 48 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:52,534 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,535 : INFO : built Dictionary(32 unique tokens: ['info', 'living', 'website', 'thanks', 'aussie']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:52,546 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:52,843 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,844 : INFO : built Dictionary(22 unique tokens: ['body', 'associates', 'p', 'qatar', 'somebody']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:52,849 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:52,850 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,851 : INFO : built Dictionary(31 unique tokens: ['plz', 'qatar', 'days', 'associates', 'p']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:52,861 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,862 : INFO : built Dictionary(27 unique tokens: ['qatar', 'associates', 'agree', 'p', 'benefits']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:52,869 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,870 : INFO : built Dictionary(24 unique tokens: ['section', 'qatar', 'associates', 'p', 'circumsized']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:52,876 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,877 : INFO : built Dictionary(29 unique tokens: ['wife', 'qatar', 'associates', 'woman', 'p']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:52,885 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,886 : INFO : built Dictionary(21 unique tokens: ['visa', 'qatar', 'country', 'associates', 'artist']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:52,890 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:52,891 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,892 : INFO : built Dictionary(46 unique tokens: ['associates', 'n', 'decided', 'tattoo', 'hehe']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:52,909 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,910 : INFO : built Dictionary(28 unique tokens: ['qatar', 'earnest', 'thanks', 'atheists', 'qlers']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:52,918 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,918 : INFO : built Dictionary(17 unique tokens: ['visa', 'qatar', 'country', 'associates', 'p']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:26:52,921 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:52,922 : INFO : built Dictionary(26 unique tokens: ['qatar', 'associates', 'guys', 'yr', 'good']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:52,928 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:54,239 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,241 : INFO : built Dictionary(21 unique tokens: ['everywhere', 'drive', 'respect', 'speed', 'contest']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:26:54,254 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,255 : INFO : built Dictionary(27 unique tokens: ['drive', 'thousand', 'money', 'km', 'car']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:54,263 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:54,264 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,265 : INFO : built Dictionary(42 unique tokens: ['drive', 'people', 'everywhere', 'tattoo', 'please']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:54,282 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,283 : INFO : built Dictionary(59 unique tokens: ['never', 'open', 'ok', 'ta', 'asking']...) from 2 documents (total 76 corpus positions)\n", + "2018-09-11 22:26:54,316 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,317 : INFO : built Dictionary(47 unique tokens: ['drive', 'provide', 'release', 'liabilities', 'card']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:26:54,338 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,340 : INFO : built Dictionary(49 unique tokens: ['ask', 'drive', 'please', 'food', 'good']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:54,363 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,364 : INFO : built Dictionary(47 unique tokens: ['drive', 'countries', 'please', 'unfair', 'limit']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:54,385 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,386 : INFO : built Dictionary(35 unique tokens: ['wife', 'want', 'drive', 'possible', 'driven']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:54,397 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:54,398 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,399 : INFO : built Dictionary(53 unique tokens: ['mom', 'drive', 'admin', 'never', 'work']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:54,425 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,426 : INFO : built Dictionary(49 unique tokens: ['worry', 'drive', 'alot', 'day', 'good']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:54,450 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:54,593 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,595 : INFO : built Dictionary(24 unique tokens: ['qatar', 'treatment', 'could', 'also', 'recommend']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:54,601 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,602 : INFO : built Dictionary(37 unique tokens: ['getting', 'tested', 'basically', 'c', 'recently']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:26:54,614 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:54,615 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,615 : INFO : built Dictionary(35 unique tokens: ['family', 'work', 'body', 'tests', 'civil']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:54,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,627 : INFO : built Dictionary(36 unique tokens: ['info', 'qatar', 'days', 'recently', 'thanks']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:54,637 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,638 : INFO : built Dictionary(29 unique tokens: ['qatar', 'recently', 'could', 'pre', 'one']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:54,646 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:54,647 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,648 : INFO : built Dictionary(35 unique tokens: ['wife', 'lungs', 'problem', 'visit', 'work']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:54,659 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,660 : INFO : built Dictionary(31 unique tokens: ['work', 'qatar', 'pls', 'located', 'could']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:26:54,669 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,670 : INFO : built Dictionary(20 unique tokens: ['getting', 'qatar', 'ok', 'enough', 'hamad']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:54,675 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:54,676 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,677 : INFO : built Dictionary(37 unique tokens: ['hospitals', 'qatar', 'also', 'insurance', 'thanks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:54,688 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:54,689 : INFO : built Dictionary(39 unique tokens: ['anxious', 'qatar', 'pls', 'nurse', 'per']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:26:54,701 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:56,147 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,149 : INFO : built Dictionary(34 unique tokens: ['perm', 'matt', 'moving', 'wife', 'thanks']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:56,182 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,183 : INFO : built Dictionary(47 unique tokens: ['perm', 'work', 'ship', 'c', 'november']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:56,204 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,205 : INFO : built Dictionary(48 unique tokens: ['perm', 'november', 'pack', 'matt', 'oil']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:56,227 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:56,228 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,229 : INFO : built Dictionary(59 unique tokens: ['info', 'november', 'goods', 'work', 'oil']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:26:56,262 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,264 : INFO : built Dictionary(63 unique tokens: ['perm', 'november', 'month', 'work', 'oil']...) from 2 documents (total 79 corpus positions)\n", + "2018-09-11 22:26:56,302 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,303 : INFO : built Dictionary(68 unique tokens: ['perm', 'november', 'matt', 'telling', 'terrier']...) from 2 documents (total 82 corpus positions)\n", + "2018-09-11 22:26:56,347 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:56,348 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,349 : INFO : built Dictionary(62 unique tokens: ['perm', 'november', 'view', 'ok', 'work']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:26:56,382 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,383 : INFO : built Dictionary(38 unique tokens: ['reply', 'work', 'qatar', 'know', 'thanks']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:56,388 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,389 : INFO : built Dictionary(48 unique tokens: ['perm', 'november', 'work', 'oil', 'day']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:56,396 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,397 : INFO : built Dictionary(16 unique tokens: ['reply', 'get', 'female', 'thanks', 'indian']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:26:56,400 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,401 : INFO : built Dictionary(42 unique tokens: ['reply', 'affidavit', 'get', 'indian', 'granted']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:26:56,408 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,409 : INFO : built Dictionary(65 unique tokens: ['perm', 'work', 'view', 'tell', 'providers']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:26:56,416 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,417 : INFO : built Dictionary(25 unique tokens: ['reply', 'get', 'would', 'maximum', 'thanks']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:56,424 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,425 : INFO : built Dictionary(45 unique tokens: ['would', 'speaking', 'maybe', 'get', 'appreciated']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:26:56,440 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,441 : INFO : built Dictionary(40 unique tokens: ['living', 'indian', 'awaiting', 'thanks', 'visit']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:26:56,454 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,455 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,455 : INFO : built Dictionary(13 unique tokens: ['reply', 'visa', 'thanks', 'indian', 'know']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:26:56,456 : INFO : built Dictionary(56 unique tokens: ['perm', 'november', 'month', 'matt', 'oil']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:56,457 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,458 : INFO : built Dictionary(23 unique tokens: ['getting', 'get', 'would', 'ok', 'thanks']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:56,464 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,465 : INFO : built Dictionary(34 unique tokens: ['friend', 'said', 'get', 'know', 'thanks']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:26:56,475 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:56,476 : INFO : built Dictionary(37 unique tokens: ['ahli', 'fluent', 'speaks', 'know', 'doctor']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:56,487 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:56,488 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:58,153 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,155 : INFO : built Dictionary(29 unique tokens: ['mandoob', 'body', 'administration', 'changing', 'manager']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:26:58,161 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,162 : INFO : built Dictionary(30 unique tokens: ['ask', 'problem', 'result', 'get', 'house']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:58,168 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,169 : INFO : built Dictionary(43 unique tokens: ['ask', 'mandoob', 'get', 'website', 'mother']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:58,178 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:58,179 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,180 : INFO : built Dictionary(42 unique tokens: ['accepted', 'mandoob', 'get', 'diploma', 'professions']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:26:58,182 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,183 : INFO : built Dictionary(9 unique tokens: ['furniture', 'stores', 'thank', 'best', 'buy']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:58,187 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,188 : INFO : built Dictionary(20 unique tokens: ['shop', 'considering', 'showrooms', 'next', 'advice']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:58,189 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,190 : INFO : built Dictionary(31 unique tokens: ['info', 'mandoob', 'get', 'days', 'thanks']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:58,194 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,195 : INFO : built Dictionary(19 unique tokens: ['would', 'stores', 'dresser', 'used', 'thank']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:26:58,196 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:58,197 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:26:58,198 : INFO : built Dictionary(27 unique tokens: ['mandoob', 'get', 'baldiya', 'visit', 'even']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:58,200 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,201 : INFO : built Dictionary(14 unique tokens: ['reasonable', 'stores', 'center', 'thank', 'mall']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:26:58,204 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,205 : INFO : built Dictionary(25 unique tokens: ['get', 'thanks', 'mu', 'daughter', 'also']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:26:58,205 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,206 : INFO : built Dictionary(19 unique tokens: ['best', 'showrooms', 'stores', 'lot', 'thanks']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:26:58,210 : INFO : Removed 11 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:58,210 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,211 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,211 : INFO : built Dictionary(41 unique tokens: ['living', 'showrooms', 'bulky', 'full', 'else']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:26:58,212 : INFO : built Dictionary(40 unique tokens: ['status', 'work', 'get', 'visit', 'n']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:26:58,220 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,221 : INFO : built Dictionary(34 unique tokens: ['get', 'lot', 'weighing', 'thanks', 'married']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:58,225 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,226 : INFO : built Dictionary(17 unique tokens: ['get', 'showrooms', 'stores', 'could', 'nest']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:26:58,228 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,229 : INFO : built Dictionary(30 unique tokens: ['mandoob', 'get', 'people', 'benefits', 'one']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:26:58,230 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,231 : INFO : built Dictionary(28 unique tokens: ['goods', 'thanks', 'thank', 'drink', 'available']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:26:58,235 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:58,239 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,240 : INFO : built Dictionary(53 unique tokens: ['shop', 'would', 'qatar', 'window', 'k']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:26:58,257 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:58,258 : INFO : built Dictionary(14 unique tokens: ['mattress', 'shop', 'showrooms', 'stores', 'know']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:26:58,261 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:26:59,908 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:59,910 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,910 : INFO : built Dictionary(46 unique tokens: ['pls', 'kaso', 'workin', 'son', 'feedback']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:26:59,926 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,927 : INFO : built Dictionary(23 unique tokens: ['tell', 'month', 'son', 'please', 'period']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:26:59,932 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,933 : INFO : built Dictionary(34 unique tokens: ['work', 'problem', 'pay', 'son', 'qatar']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:26:59,942 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,943 : INFO : built Dictionary(41 unique tokens: ['info', 'qatar', 'steps', 'limit', 'thanks']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:26:59,955 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,956 : INFO : built Dictionary(24 unique tokens: ['education', 'get', 'could', 'complete', 'son']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:59,959 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,960 : INFO : built Dictionary(19 unique tokens: ['get', 'look', 'apartments', 'ok', 'thanks']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:26:59,962 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,963 : INFO : built Dictionary(41 unique tokens: ['living', 'awaiting', 'thanks', 'residency', 'old']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:26:59,972 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,973 : INFO : built Dictionary(41 unique tokens: ['goods', 'household', 'apartments', 'ok', 'thanks']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:26:59,974 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,975 : INFO : built Dictionary(28 unique tokens: ['limit', 'wage', 'per', 'government', 'month']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:26:59,982 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:59,983 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,983 : INFO : built Dictionary(23 unique tokens: ['qatar', 'hello', 'currently', 'son', 'available']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:26:59,989 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:26:59,990 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,990 : INFO : built Dictionary(44 unique tokens: ['feed', 'either', 'son', 'good', 'ur']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:26:59,990 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:26:59,991 : INFO : built Dictionary(29 unique tokens: ['informed', 'qatar', 'apartments', 'ok', 'thanks']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:00,001 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,002 : INFO : built Dictionary(26 unique tokens: ['reply', 'apartments', 'ok', 'thanks', 'showroom']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:00,003 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,004 : INFO : built Dictionary(44 unique tokens: ['freaked', 'god', 'almost', 'maid', 'care']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:00,009 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,010 : INFO : built Dictionary(39 unique tokens: ['get', 'apartments', 'need', 'thanks', 'wardrobes']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:00,018 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:00,025 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,026 : INFO : built Dictionary(44 unique tokens: ['ok', 'get', 'good', 'question', 'months']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:00,044 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,046 : INFO : built Dictionary(45 unique tokens: ['thankx', 'ok', 'excluding', 'manager', 'qar']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:00,063 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,064 : INFO : built Dictionary(47 unique tokens: ['ask', 'ok', 'nurse', 'attend', 'clinics']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:00,085 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,086 : INFO : built Dictionary(44 unique tokens: ['cuz', 'mornings', 'wb', 'question', 'stuff']...) from 2 documents (total 60 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:00,105 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:00,106 : INFO : built Dictionary(39 unique tokens: ['get', 'know', 'apartments', 'ok', 'seem']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:00,120 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:01,673 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,675 : INFO : built Dictionary(35 unique tokens: ['qatar', 'suggestions', 'receive', 'nman', 'hello']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:01,684 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,685 : INFO : built Dictionary(25 unique tokens: ['charges', 'warm', 'say', 'hav', 'india']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:01,691 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:01,692 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,693 : INFO : built Dictionary(44 unique tokens: ['unable', 'nri', 'better', 'indians', 'icici']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:01,705 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,706 : INFO : built Dictionary(33 unique tokens: ['track', 'thanks', 'free', 'money', 'funds']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:01,714 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:01,715 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,716 : INFO : built Dictionary(35 unique tokens: ['side', 'country', 'break', 'qnb', 'cards']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:01,724 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,725 : INFO : built Dictionary(14 unique tokens: ['qatar', 'online', 'know', 'much', 'money']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:27:01,728 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,729 : INFO : built Dictionary(39 unique tokens: ['arrest', 'info', 'get', 'receive', 'didnt']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:01,738 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,739 : INFO : built Dictionary(36 unique tokens: ['plz', 'living', 'feed', 'wise', 'hello']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:01,748 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,749 : INFO : built Dictionary(26 unique tokens: ['within', 'moving', 'pack', 'please', 'possible']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:01,755 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,756 : INFO : built Dictionary(35 unique tokens: ['dies', 'advise', 'dozed', 'afternoon', 'bus']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:01,765 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:01,804 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,805 : INFO : built Dictionary(12 unique tokens: ['park', 'open', 'places', 'zoo', 'doha']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:01,807 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,808 : INFO : built Dictionary(31 unique tokens: ['park', 'body', 'people', 'cool', 'hello']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:01,813 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,813 : INFO : built Dictionary(7 unique tokens: ['aquapark', 'park', 'qatar', 'theme', 'water']...) from 2 documents (total 13 corpus positions)\n", + "2018-09-11 22:27:01,815 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,816 : INFO : built Dictionary(14 unique tokens: ['aquapark', 'park', 'qatar', 'url_token', 'visit']...) from 2 documents (total 17 corpus positions)\n", + "2018-09-11 22:27:01,818 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,819 : INFO : built Dictionary(17 unique tokens: ['would', 'park', 'concern', 'mentioned', 'see']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:01,821 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,822 : INFO : built Dictionary(14 unique tokens: ['park', 'open', 'industrial', 'somewhere', 'real']...) from 2 documents (total 16 corpus positions)\n", + "2018-09-11 22:27:01,824 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,825 : INFO : built Dictionary(9 unique tokens: ['asked', 'aquapark', 'open', 'question', 'similar']...) from 2 documents (total 14 corpus positions)\n", + "2018-09-11 22:27:01,827 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,827 : INFO : built Dictionary(10 unique tokens: ['aquapark', 'disneyland', 'qatar', 'build', 'doha']...) from 2 documents (total 15 corpus positions)\n", + "2018-09-11 22:27:01,829 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,830 : INFO : built Dictionary(21 unique tokens: ['ask', 'answer', 'employment', 'company', 'process']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:01,833 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:01,834 : INFO : built Dictionary(11 unique tokens: ['eid', 'aquapark', 'qatar', 'happenings', 'doha']...) from 2 documents (total 15 corpus positions)\n", + "2018-09-11 22:27:01,836 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:03,424 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,426 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,427 : INFO : built Dictionary(37 unique tokens: ['since', 'qatar', 'visit', 'thanks', 'honest']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:03,436 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,437 : INFO : built Dictionary(21 unique tokens: ['visa', 'application', 'required', 'spouse', 'advice']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:03,442 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,443 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,444 : INFO : built Dictionary(41 unique tokens: ['wife', 'know', 'sharing', 'thanks', 'help']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:03,454 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,455 : INFO : built Dictionary(28 unique tokens: ['wife', 'ant', 'spouse', 'thanks', 'bringing']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:03,462 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,463 : INFO : built Dictionary(27 unique tokens: ['friend', 'said', 'qatar', 'anybody', 'philippine']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:03,469 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,470 : INFO : built Dictionary(19 unique tokens: ['reply', 'visa', 'ticket', 'limit', 'bringing']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:03,474 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,475 : INFO : built Dictionary(25 unique tokens: ['entry', 'qa', 'limit', 'also', 'spouse']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:03,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,482 : INFO : built Dictionary(22 unique tokens: ['work', 'get', 'spouse', 'thanks', 'aquiring']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:03,487 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,488 : INFO : built Dictionary(27 unique tokens: ['get', 'travel', 'limit', 'almost', 'shocked']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:03,494 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:03,495 : INFO : built Dictionary(19 unique tokens: ['got', 'application', 'bringing', 'schedule', 'meet']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:03,499 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:03,527 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,528 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,529 : INFO : built Dictionary(30 unique tokens: ['nationals', 'qatar', 'admin', 'could', 'benefits']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:03,540 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,541 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,542 : INFO : built Dictionary(26 unique tokens: ['current', 'qatar', 'admin', 'could', 'junior']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:03,551 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,552 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,553 : INFO : built Dictionary(26 unique tokens: ['current', 'qatar', 'would', 'admin', 'could']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:03,562 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,562 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,563 : INFO : built Dictionary(50 unique tokens: ['admin', 'allowance', 'please', 'good', 'decently']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:03,587 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,588 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,588 : INFO : built Dictionary(34 unique tokens: ['current', 'qatar', 'would', 'admin', 'could']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:03,604 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,604 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,605 : INFO : built Dictionary(17 unique tokens: ['current', 'much', 'appreciate', 'airways', 'assistant']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:03,614 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,615 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,616 : INFO : built Dictionary(29 unique tokens: ['current', 'qatar', 'admin', 'allowance', 'could']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:03,625 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,627 : INFO : built Dictionary(40 unique tokens: ['current', 'qatar', 'admin', 'could', 'qar']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:03,643 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,644 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,644 : INFO : built Dictionary(39 unique tokens: ['ranges', 'current', 'qatar', 'employment', 'admin']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:03,659 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:03,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:03,661 : INFO : built Dictionary(34 unique tokens: ['info', 'work', 'qatar', 'admin', 'real']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:03,673 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:05,168 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,169 : INFO : built Dictionary(36 unique tokens: ['calculate', 'work', 'qatar', 'know', 'gratuity']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:05,181 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:05,182 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,183 : INFO : built Dictionary(48 unique tokens: ['resign', 'work', 'resignation', 'allow', 'gratuity']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:05,201 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,202 : INFO : built Dictionary(25 unique tokens: ['calculate', 'work', 'get', 'gratuity', 'thanks']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:05,209 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,210 : INFO : built Dictionary(24 unique tokens: ['calculate', 'work', 'gratuity', 'thanks', 'benefits']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:05,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,216 : INFO : built Dictionary(42 unique tokens: ['anxious', 'calculate', 'qatar', 'pls', 'annual']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:05,231 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,232 : INFO : built Dictionary(26 unique tokens: ['wife', 'calculate', 'days', 'work', 'bringing']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:05,239 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,240 : INFO : built Dictionary(48 unique tokens: ['work', 'benefits', 'october', 'please', 'good']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:05,257 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,258 : INFO : built Dictionary(37 unique tokens: ['plz', 'calculate', 'days', 'sufficient', 'willing']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:05,270 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,271 : INFO : built Dictionary(42 unique tokens: ['transport', 'calculate', 'qatar', 'days', 'calculation']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:05,286 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,287 : INFO : built Dictionary(35 unique tokens: ['calculate', 'el', 'qatar', 'labour', 'section']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:05,299 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:05,366 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,367 : INFO : built Dictionary(19 unique tokens: ['bottle', 'pass', 'opening', 'knows', 'souk']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:05,371 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,372 : INFO : built Dictionary(19 unique tokens: ['give', 'getting', 'much', 'specifics', 'thanks']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:27:05,376 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,377 : INFO : built Dictionary(19 unique tokens: ['nice', 'cold', 'get', 'beer', 'except']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:05,381 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,382 : INFO : built Dictionary(16 unique tokens: ['get', 'light', 'reason', 'one', 'trace']...) from 2 documents (total 17 corpus positions)\n", + "2018-09-11 22:27:05,386 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,386 : INFO : built Dictionary(26 unique tokens: ['qatar', 'selling', 'enjoy', 'surely', 'one']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:05,392 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,393 : INFO : built Dictionary(27 unique tokens: ['problem', 'left', 'could', 'qatari', 'hurry']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:05,399 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,400 : INFO : built Dictionary(15 unique tokens: ['qatar', 'ban', 'also', 'alcohol', 'get']...) from 2 documents (total 18 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:05,403 : INFO : Removed 4 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:05,404 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,405 : INFO : built Dictionary(41 unique tokens: ['moving', 'pls', 'area', 'comments', 'hello']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:05,415 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,415 : INFO : built Dictionary(14 unique tokens: ['get', 'people', 'lot', 'must', 'plan']...) from 2 documents (total 14 corpus positions)\n", + "2018-09-11 22:27:05,418 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:05,419 : INFO : built Dictionary(34 unique tokens: ['getting', 'mumbai', 'night', 'anybody', 'n']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:05,427 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:06,969 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:06,971 : INFO : built Dictionary(35 unique tokens: ['body', 'means', 'administration', 'changing', 'manager']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:06,982 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:06,983 : INFO : built Dictionary(31 unique tokens: ['ask', 'wife', 'means', 'civil', 'profession']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:06,993 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:06,994 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:06,994 : INFO : built Dictionary(47 unique tokens: ['civil', 'say', 'rp', 'please', 'advise']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:27:07,011 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,012 : INFO : built Dictionary(16 unique tokens: ['visa', 'means', 'advice', 'procedure', 'profession']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:07,016 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,017 : INFO : built Dictionary(25 unique tokens: ['wife', 'means', 'profession', 'say', 'please']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:07,024 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,025 : INFO : built Dictionary(24 unique tokens: ['schedule', 'profession', 'meet', 'month', 'one']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:07,031 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,032 : INFO : built Dictionary(44 unique tokens: ['wont', 'allow', 'answers', 'say', 'ticket']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:07,047 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,048 : INFO : built Dictionary(39 unique tokens: ['wife', 'counter', 'means', 'profession', 'submission']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:07,061 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,062 : INFO : built Dictionary(30 unique tokens: ['wife', 'means', 'visit', 'profession', 'long']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:07,070 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,071 : INFO : built Dictionary(27 unique tokens: ['qualify', 'stated', 'means', 'arrival', 'yes']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:07,079 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:07,121 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,122 : INFO : built Dictionary(31 unique tokens: ['reasonable', 'qatar', 'restaurants', 'back', 'attendants']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:07,130 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,130 : INFO : built Dictionary(18 unique tokens: ['tip', 'much', 'percent', 'back', 'beauty']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:07,135 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,136 : INFO : built Dictionary(6 unique tokens: ['percent', 'much', 'home', 'back', 'leave']...) from 2 documents (total 14 corpus positions)\n", + "2018-09-11 22:27:07,138 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,139 : INFO : built Dictionary(40 unique tokens: ['taken', 'people', 'tip', 'short', 'deemed']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:07,148 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,149 : INFO : built Dictionary(23 unique tokens: ['probably', 'tip', 'topic', 'qar', 'good']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:07,154 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,155 : INFO : built Dictionary(22 unique tokens: ['even', 'day', 'stuffs', 'carry', 'talk']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:07,160 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:07,161 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,162 : INFO : built Dictionary(33 unique tokens: ['ask', 'work', 'per', 'bartenders', 'month']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:07,169 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:07,170 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,171 : INFO : built Dictionary(25 unique tokens: ['cash', 'thanks', 'cards', 'finally', 'card']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:07,176 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,177 : INFO : built Dictionary(23 unique tokens: ['replys', 'thanks', 'wear', 'things', 'one']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:07,182 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:07,183 : INFO : built Dictionary(36 unique tokens: ['moves', 'area', 'odd', 'asking', 'cleaning']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:07,191 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:08,753 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:08,754 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,755 : INFO : built Dictionary(36 unique tokens: ['permit', 'qatar', 'countries', 'valuable', 'rules']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:08,768 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,769 : INFO : built Dictionary(20 unique tokens: ['ask', 'cancelled', 'qatar', 'visa', 'rules']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:08,775 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:08,776 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,777 : INFO : built Dictionary(42 unique tokens: ['getting', 'qatar', 'would', 'raaj', 'long']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:08,793 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,794 : INFO : built Dictionary(29 unique tokens: ['permit', 'qatar', 'pls', 'work', 'true']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:08,803 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:08,804 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,805 : INFO : built Dictionary(47 unique tokens: ['cancelled', 'work', 'release', 'manager', 'rp']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:08,823 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,824 : INFO : built Dictionary(23 unique tokens: ['work', 'qatar', 'long', 'leave', 'one']...) from 2 documents (total 33 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:08,831 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,832 : INFO : built Dictionary(25 unique tokens: ['alternative', 'qatar', 'thanks', 'getting', 'long']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:08,839 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,840 : INFO : built Dictionary(30 unique tokens: ['cancelled', 'work', 'qatar', 'wait', 'long']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:08,849 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,850 : INFO : built Dictionary(41 unique tokens: ['ask', 'work', 'entry', 'went', 'vacation']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:08,865 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,866 : INFO : built Dictionary(22 unique tokens: ['work', 'get', 'employment', 'long', 'hello']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:08,872 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:08,876 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,877 : INFO : built Dictionary(37 unique tokens: ['ask', 'hypermarket', 'get', 'customer', 'excess']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:08,889 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,890 : INFO : built Dictionary(41 unique tokens: ['ask', 'qatar', 'tells', 'tell', 'gum']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:08,903 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,904 : INFO : built Dictionary(28 unique tokens: ['ask', 'qatar', 'almost', 'cashier', 'wait']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:08,911 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,912 : INFO : built Dictionary(23 unique tokens: ['ask', 'give', 'open', 'thanks', 'soon']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:08,918 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,919 : INFO : built Dictionary(23 unique tokens: ['ask', 'qatar', 'beauty', 'pedicure', 'noticed']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:08,924 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,925 : INFO : built Dictionary(40 unique tokens: ['ask', 'get', 'boils', 'people', 'change']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:08,937 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,938 : INFO : built Dictionary(28 unique tokens: ['ask', 'give', 'get', 'wage', 'per']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:08,945 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,946 : INFO : built Dictionary(30 unique tokens: ['ask', 'positive', 'qatar', 'hey', 'coins']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:08,954 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,955 : INFO : built Dictionary(43 unique tokens: ['locals', 'kind', 'behave', 'people', 'coins']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:08,967 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:08,968 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:08,969 : INFO : built Dictionary(21 unique tokens: ['arab', 'ask', 'get', 'world', 'reason']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:08,974 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:10,540 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,541 : INFO : built Dictionary(36 unique tokens: ['wife', 'qatar', 'civil', 'even', 'son']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:10,554 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,555 : INFO : built Dictionary(40 unique tokens: ['ask', 'wife', 'qatar', 'would', 'hello']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:10,570 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,571 : INFO : built Dictionary(28 unique tokens: ['rumors', 'one', 'please', 'cannot', 'house']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:10,580 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,581 : INFO : built Dictionary(26 unique tokens: ['wife', 'one', 'possible', 'fee', 'rumors']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:10,589 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,590 : INFO : built Dictionary(37 unique tokens: ['known', 'uae', 'real', 'wait', 'soon']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:10,602 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:10,603 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,604 : INFO : built Dictionary(36 unique tokens: ['difference', 'said', 'qatar', 'pls', 'increased']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:10,617 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:10,618 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,618 : INFO : built Dictionary(44 unique tokens: ['rules', 'please', 'heard', 'appreciated', 'advise']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:10,635 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:10,636 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,637 : INFO : built Dictionary(45 unique tokens: ['day', 'birth', 'required', 'advise', 'attest']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:10,655 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,656 : INFO : built Dictionary(41 unique tokens: ['known', 'soon', 'totally', 'normal', 'started']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:10,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,661 : INFO : built Dictionary(21 unique tokens: ['iphone', 'get', 'would', 'simcard', 'shipped']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:10,671 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:10,672 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,673 : INFO : built Dictionary(33 unique tokens: ['get', 'baldiya', 'visit', 'even', 'tenancy']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:10,675 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,676 : INFO : built Dictionary(48 unique tokens: ['work', 'round', 'know', 'helps', 'get']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:10,685 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:10,700 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:10,701 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,702 : INFO : built Dictionary(32 unique tokens: ['received', 'shop', 'get', 'would', 'ive']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:10,712 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,713 : INFO : built Dictionary(54 unique tokens: ['received', 'round', 'garmin', 'irrelevant', 'routed']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:10,741 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,742 : INFO : built Dictionary(27 unique tokens: ['received', 'would', 'shipped', 'told', 'ideas']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:10,748 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,749 : INFO : built Dictionary(31 unique tokens: ['received', 'get', 'would', 'curious', 'shipped']...) from 2 documents (total 38 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:10,757 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,758 : INFO : built Dictionary(43 unique tokens: ['difference', 'received', 'qatar', 'capacity', 'need']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:10,775 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,776 : INFO : built Dictionary(41 unique tokens: ['received', 'qatar', 'would', 'means', 'iphone']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:10,793 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,794 : INFO : built Dictionary(50 unique tokens: ['barely', 'round', 'would', 'better', 'p']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:10,818 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:10,819 : INFO : built Dictionary(47 unique tokens: ['round', 'recently', 'men', 'received', 'wearing']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:10,840 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:12,361 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,362 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,363 : INFO : built Dictionary(38 unique tokens: ['wife', 'work', 'one', 'weeks', 'thanks']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:12,378 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,379 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,380 : INFO : built Dictionary(40 unique tokens: ['getting', 'body', 'gmail', 'sponsorship', 'administration']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:12,394 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,395 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,396 : INFO : built Dictionary(20 unique tokens: ['getting', 'mail', 'e', 'permit', 'iqama']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:12,401 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,402 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,403 : INFO : built Dictionary(28 unique tokens: ['ask', 'cancelled', 'qatar', 'comeback', 'residence']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:12,410 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,411 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,411 : INFO : built Dictionary(51 unique tokens: ['ask', 'permit', 'provide', 'answers', 'sponsorship']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:12,433 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,434 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,435 : INFO : built Dictionary(33 unique tokens: ['permit', 'entry', 'employment', 'need', 'getting']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:12,445 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,446 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,447 : INFO : built Dictionary(28 unique tokens: ['wife', 'getting', 'gmail', 'sponsorship', 'permit']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:12,456 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,457 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,457 : INFO : built Dictionary(27 unique tokens: ['work', 'thanks', 'getting', 'even', 'sponsorship']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:12,466 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,466 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,467 : INFO : built Dictionary(37 unique tokens: ['permits', 'permit', 'gmail', 'means', 'getting']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:12,480 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,481 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,482 : INFO : built Dictionary(47 unique tokens: ['permit', 'gmail', 'need', 'else', 'iqama']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:12,501 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:12,536 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,537 : INFO : built Dictionary(57 unique tokens: ['info', 'would', 'circus', 'qatari', 'travel']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:12,569 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,570 : INFO : built Dictionary(35 unique tokens: ['info', 'roland', 'would', 'usa', 'thanks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:12,583 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,584 : INFO : built Dictionary(44 unique tokens: ['info', 'know', 'need', 'qatari', 'travel']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:12,604 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,605 : INFO : built Dictionary(45 unique tokens: ['info', 'know', 'exam', 'travel', 'besides']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:12,625 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,627 : INFO : built Dictionary(40 unique tokens: ['best', 'give', 'qatar', 'know', 'corporate']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:12,643 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,644 : INFO : built Dictionary(50 unique tokens: ['ask', 'info', 'would', 'need', 'nurse']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:12,667 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,668 : INFO : built Dictionary(50 unique tokens: ['info', 'permit', 'would', 'c', 'qatari']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:12,691 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,692 : INFO : built Dictionary(52 unique tokens: ['info', 'pls', 'nurse', 'qatari', 'travel']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:12,717 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,718 : INFO : built Dictionary(45 unique tokens: ['info', 'countries', 'qatari', 'travel', 'please']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:12,739 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:12,740 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:12,740 : INFO : built Dictionary(53 unique tokens: ['info', 'taken', 'saudi', 'refrain', 'qatari']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:12,766 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:14,171 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,172 : INFO : built Dictionary(22 unique tokens: ['wife', 'qatar', 'old', 'please', 'months']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:14,179 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:14,180 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,181 : INFO : built Dictionary(34 unique tokens: ['get', 'better', 'thanks', 'bcg', 'come']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:14,191 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,192 : INFO : built Dictionary(26 unique tokens: ['maid', 'qatar', 'new', 'insurance', 'old']...) from 2 documents (total 36 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:14,199 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,200 : INFO : built Dictionary(50 unique tokens: ['ask', 'nurse', 'good', 'months', 'years']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:14,218 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,219 : INFO : built Dictionary(37 unique tokens: ['qatar', 'new', 'old', 'cost', 'one']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:14,230 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,231 : INFO : built Dictionary(35 unique tokens: ['work', 'qatar', 'could', 'government', 'soon']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:14,242 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,243 : INFO : built Dictionary(35 unique tokens: ['wife', 'work', 'qatar', 'weeks', 'thanks']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:14,254 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,255 : INFO : built Dictionary(43 unique tokens: ['wife', 'give', 'qatar', 'pls', 'tell']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:14,270 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,271 : INFO : built Dictionary(33 unique tokens: ['busy', 'thanks', 'comments', 'nursery', 'one']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:14,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,282 : INFO : built Dictionary(41 unique tokens: ['preschool', 'intend', 'get', 'group', 'new']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:14,296 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:14,457 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,458 : INFO : built Dictionary(24 unique tokens: ['qatar', 'vacation', 'get', 'please', 'possibilities']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:14,466 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,467 : INFO : built Dictionary(13 unique tokens: ['person', 'country', 'vacation', 'apart', 'ban']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:14,473 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,474 : INFO : built Dictionary(45 unique tokens: ['clearing', 'fired', 'cards', 'travel', 'credit']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:14,491 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:14,492 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,493 : INFO : built Dictionary(46 unique tokens: ['cancelled', 'vacation', 'release', 'manager', 'rp']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:14,510 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,511 : INFO : built Dictionary(45 unique tokens: ['countries', 'list', 'answers', 'please', 'year']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:14,527 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,528 : INFO : built Dictionary(19 unique tokens: ['ask', 'work', 'qatar', 'country', 'vacation']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:27:14,533 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,534 : INFO : built Dictionary(37 unique tokens: ['work', 'problem', 'vacation', 'pay', 'qatar']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:14,546 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:14,547 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,548 : INFO : built Dictionary(37 unique tokens: ['permit', 'qatar', 'countries', 'valuable', 'rules']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:14,561 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:14,562 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,563 : INFO : built Dictionary(33 unique tokens: ['qatar', 'women', 'guardian', 'entertainment', 'ride']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:14,574 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:14,575 : INFO : built Dictionary(38 unique tokens: ['qualify', 'country', 'qatar', 'person', 'back']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:14,588 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:15,973 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:15,974 : INFO : built Dictionary(41 unique tokens: ['reasonable', 'keeping', 'treatment', 'thought', 'right']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:15,988 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:15,989 : INFO : built Dictionary(52 unique tokens: ['childrens', 'reasonable', 'spectrum', 'treatment', 'second']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:16,006 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,008 : INFO : built Dictionary(30 unique tokens: ['reasonable', 'suggest', 'treatment', 'cockroaches', 'small']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:16,016 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,017 : INFO : built Dictionary(38 unique tokens: ['wanna', 'reasonable', 'problem', 'treatment', 'ok']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:16,028 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,029 : INFO : built Dictionary(36 unique tokens: ['trouble', 'reasonable', 'basically', 'treatment', 'belong']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:16,040 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,041 : INFO : built Dictionary(22 unique tokens: ['reasonable', 'treatment', 'details', 'cost', 'good']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:16,047 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,047 : INFO : built Dictionary(23 unique tokens: ['reasonable', 'treatment', 'sleep', 'put', 'kids']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:16,054 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:16,054 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,055 : INFO : built Dictionary(51 unique tokens: ['reasonable', 'cold', 'treat', 'treatment', 'p']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:16,073 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,074 : INFO : built Dictionary(28 unique tokens: ['reasonable', 'admin', 'mistake', 'persian', 'window']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:16,081 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:16,082 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,083 : INFO : built Dictionary(28 unique tokens: ['reasonable', 'closed', 'see', 'roundabout', 'practising']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:16,091 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:16,272 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:16,273 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,274 : INFO : built Dictionary(15 unique tokens: ['shop', 'name', 'ive', 'apple', 'reseller']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:16,277 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,278 : INFO : built Dictionary(16 unique tokens: ['isnt', 'qatar', 'brands', 'thanks', 'guys']...) from 2 documents (total 25 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:16,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,282 : INFO : built Dictionary(30 unique tokens: ['flavours', 'qatar', 'isnt', 'store', 'seem']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:16,288 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,289 : INFO : built Dictionary(12 unique tokens: ['shoe', 'qatar', 'isnt', 'clothes', 'apple']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:16,291 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,292 : INFO : built Dictionary(24 unique tokens: ['deserves', 'get', 'isnt', 'thanks', 'apple']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:16,296 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,297 : INFO : built Dictionary(15 unique tokens: ['isnt', 'moving', 'stores', 'thanks', 'apple']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:27:16,300 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,301 : INFO : built Dictionary(22 unique tokens: ['isnt', 'qatar', 'except', 'edge', 'apple']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:16,305 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,306 : INFO : built Dictionary(35 unique tokens: ['isnt', 'qatar', 'opening', 'soon', 'apple']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:16,312 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,313 : INFO : built Dictionary(18 unique tokens: ['chk', 'isnt', 'lets', 'apple', 'surfing']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:16,317 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:16,318 : INFO : built Dictionary(11 unique tokens: ['mattress', 'shop', 'anyone', 'shops', 'apple']...) from 2 documents (total 16 corpus positions)\n", + "2018-09-11 22:27:16,320 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:17,760 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,762 : INFO : built Dictionary(35 unique tokens: ['wife', 'qatar', 'labour', 'civil', 'even']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:17,773 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,774 : INFO : built Dictionary(38 unique tokens: ['ask', 'wife', 'qatar', 'would', 'labour']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:27:17,788 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,789 : INFO : built Dictionary(23 unique tokens: ['labour', 'hello', 'applying', 'house', 'required']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:17,797 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,798 : INFO : built Dictionary(24 unique tokens: ['wife', 'labour', 'hello', 'possible', 'applying']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:17,804 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,805 : INFO : built Dictionary(34 unique tokens: ['uae', 'labour', 'real', 'wait', 'soon']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:17,816 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:17,817 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,818 : INFO : built Dictionary(33 unique tokens: ['difference', 'said', 'qatar', 'pls', 'labour']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:17,829 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:17,830 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,831 : INFO : built Dictionary(41 unique tokens: ['since', 'qatar', 'visit', 'thanks', 'honest']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:17,846 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:17,847 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,847 : INFO : built Dictionary(43 unique tokens: ['wife', 'know', 'hello', 'sharing', 'thanks']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:17,863 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,864 : INFO : built Dictionary(38 unique tokens: ['labour', 'soon', 'totally', 'normal', 'started']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:17,877 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:17,878 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,878 : INFO : built Dictionary(31 unique tokens: ['get', 'baldiya', 'labour', 'visit', 'even']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:17,889 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:17,995 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:17,996 : INFO : built Dictionary(29 unique tokens: ['staff', 'work', 'qatar', 'would', 'looked']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:18,022 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,022 : INFO : built Dictionary(42 unique tokens: ['staff', 'work', 'qatar', 'would', 'doctor']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:18,040 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,041 : INFO : built Dictionary(55 unique tokens: ['staff', 'work', 'pls', 'nurse', 'free']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:18,070 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,071 : INFO : built Dictionary(36 unique tokens: ['staff', 'getting', 'hugely', 'would', 'ok']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:18,083 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,084 : INFO : built Dictionary(34 unique tokens: ['staff', 'work', 'qatar', 'would', 'right']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:18,092 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,093 : INFO : built Dictionary(55 unique tokens: ['staff', 'work', 'program', 'salaries', 'reply']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:27:18,124 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,125 : INFO : built Dictionary(42 unique tokens: ['staff', 'work', 'qatar', 'would', 'deliver']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:18,140 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,141 : INFO : built Dictionary(52 unique tokens: ['staff', 'work', 'would', 'doctor', 'secure']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:18,168 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,169 : INFO : built Dictionary(42 unique tokens: ['staff', 'native', 'hugely', 'would', 'treatment']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:18,185 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:18,186 : INFO : built Dictionary(46 unique tokens: ['said', 'pls', 'doctor', 'work', 'accommodation']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:18,208 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:19,559 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,560 : INFO : built Dictionary(32 unique tokens: ['permits', 'permit', 'qatar', 'days', 'means']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:19,570 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,572 : INFO : built Dictionary(43 unique tokens: ['wife', 'give', 'qatar', 'pls', 'tell']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:19,587 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:19,588 : INFO : built Dictionary(36 unique tokens: ['wife', 'qatar', 'days', 'visit', 'shall']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:19,599 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,600 : INFO : built Dictionary(37 unique tokens: ['days', 'get', 'validity', 'except', 'shut']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:19,612 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:19,613 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,614 : INFO : built Dictionary(45 unique tokens: ['cancelled', 'release', 'manager', 'shall', 'rp']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:19,630 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,631 : INFO : built Dictionary(20 unique tokens: ['qatar', 'extend', 'expiry', 'penalty', 'shall']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:19,636 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:19,637 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,637 : INFO : built Dictionary(30 unique tokens: ['get', 'valid', 'comments', 'shall', 'penalty']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:19,647 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,648 : INFO : built Dictionary(23 unique tokens: ['friend', 'wife', 'qatar', 'know', 'thanks']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:19,654 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:19,655 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,656 : INFO : built Dictionary(40 unique tokens: ['arab', 'qatar', 'allow', 'tuesday', 'penalty']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:19,669 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,670 : INFO : built Dictionary(43 unique tokens: ['qatar', 'reagarding', 'transfering', 'transfer', 'shall']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:19,684 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:19,896 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,898 : INFO : built Dictionary(17 unique tokens: ['get', 'next', 'woman', 'year', 'even']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:19,907 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,908 : INFO : built Dictionary(36 unique tokens: ['get', 'woman', 'nurse', 'rules', 'even']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:19,922 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,923 : INFO : built Dictionary(26 unique tokens: ['work', 'get', 'woman', 'thanks', 'even']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:19,931 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,932 : INFO : built Dictionary(28 unique tokens: ['friend', 'wife', 'get', 'woman', 'thanks']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:19,941 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,942 : INFO : built Dictionary(39 unique tokens: ['wife', 'work', 'get', 'weeks', 'thanks']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:19,959 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,960 : INFO : built Dictionary(49 unique tokens: ['granted', 'present', 'please', 'host', 'week']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:19,981 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,983 : INFO : built Dictionary(33 unique tokens: ['get', 'woman', 'relocate', 'even', 'sponsorship']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:19,994 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:19,995 : INFO : built Dictionary(23 unique tokens: ['get', 'woman', 'marry', 'even', 'july']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:20,001 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:20,001 : INFO : built Dictionary(31 unique tokens: ['nationality', 'qatar', 'people', 'woman', 'even']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:20,011 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:20,012 : INFO : built Dictionary(47 unique tokens: ['work', 'answers', 'sponsor', 'moving', 'good']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:20,030 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:21,355 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,357 : INFO : built Dictionary(21 unique tokens: ['psoriasis', 'permit', 'clearing', 'visa', 'presently']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:21,362 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,363 : INFO : built Dictionary(39 unique tokens: ['psoriasis', 'permit', 'body', 'administration', 'changing']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:21,376 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,377 : INFO : built Dictionary(20 unique tokens: ['psoriasis', 'permit', 'get', 'c', 'presently']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:21,382 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,383 : INFO : built Dictionary(37 unique tokens: ['permits', 'permit', 'clearing', 'means', 'work']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:21,396 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,397 : INFO : built Dictionary(41 unique tokens: ['info', 'permit', 'clearing', 'days', 'later']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:21,413 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,414 : INFO : built Dictionary(45 unique tokens: ['ask', 'book', 'clearing', 'said', 'moving']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:21,433 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,434 : INFO : built Dictionary(33 unique tokens: ['psoriasis', 'permit', 'qatar', 'later', 'thanks']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:21,444 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,445 : INFO : built Dictionary(36 unique tokens: ['psoriasis', 'permit', 'get', 'right', 'moving']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:21,458 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:21,459 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,460 : INFO : built Dictionary(51 unique tokens: ['resign', 'permit', 'clearing', 'pls', 'work']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:21,482 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,483 : INFO : built Dictionary(46 unique tokens: ['ask', 'wanna', 'clearing', 'days', 'almost']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:21,502 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:21,727 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:21,728 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,729 : INFO : built Dictionary(44 unique tokens: ['intend', 'drive', 'valid', 'weeks', 'travel']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:21,749 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,750 : INFO : built Dictionary(39 unique tokens: ['intend', 'get', 'people', 'crazy', 'gets']...) from 2 documents (total 52 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:21,764 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:21,765 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,766 : INFO : built Dictionary(34 unique tokens: ['trap', 'qatar', 'crazy', 'intend', 'weeks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:21,775 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,776 : INFO : built Dictionary(38 unique tokens: ['flying', 'drive', 'travel', 'bodybuilding', 'supplements']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:21,789 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:21,790 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,790 : INFO : built Dictionary(44 unique tokens: ['intend', 'engagement', 'back', 'travel', 'drive']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:21,807 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,809 : INFO : built Dictionary(41 unique tokens: ['intend', 'drive', 'crazy', 'social', 'meet']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:21,822 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,824 : INFO : built Dictionary(37 unique tokens: ['intend', 'drive', 'group', 'study', 'max']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:21,834 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,835 : INFO : built Dictionary(60 unique tokens: ['intend', 'drive', 'need', 'weeks', 'rent']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:21,869 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,871 : INFO : built Dictionary(43 unique tokens: ['locals', 'intend', 'drive', 'york', 'back']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:21,887 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:21,888 : INFO : built Dictionary(55 unique tokens: ['arab', 'said', 'drive', 'countries', 'intend']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:21,916 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:23,163 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:23,164 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,165 : INFO : built Dictionary(30 unique tokens: ['get', 'baldiya', 'stamp', 'visit', 'even']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:23,173 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,174 : INFO : built Dictionary(20 unique tokens: ['attestation', 'get', 'visa', 'quick', 'procedure']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:23,179 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:23,180 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,181 : INFO : built Dictionary(40 unique tokens: ['said', 'tenancy', 'qr', 'stamp', 'question']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:23,191 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,192 : INFO : built Dictionary(14 unique tokens: ['stamp', 'get', 'baladiya', 'rent', 'contact']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:23,195 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,196 : INFO : built Dictionary(23 unique tokens: ['get', 'could', 'baladiya', 'contact', 'rent']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:23,201 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:23,202 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,203 : INFO : built Dictionary(23 unique tokens: ['get', 'designation', 'month', 'rent', 'mth']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:23,208 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,209 : INFO : built Dictionary(42 unique tokens: ['ask', 'rental', 'get', 'went', 'stamp']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:23,219 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,220 : INFO : built Dictionary(21 unique tokens: ['stamp', 'got', 'application', 'schedule', 'get']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:23,225 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,225 : INFO : built Dictionary(22 unique tokens: ['get', 'thanks', 'baladiya', 'submitting', 'qatar']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:23,230 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:23,231 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,232 : INFO : built Dictionary(41 unique tokens: ['activities', 'get', 'diving', 'stamp', 'interested']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:23,242 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:23,601 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,602 : INFO : built Dictionary(30 unique tokens: ['work', 'get', 'people', 'god', 'willing']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:23,628 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,629 : INFO : built Dictionary(50 unique tokens: ['work', 'god', 'willing', 'free', 'exposure']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:23,655 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,656 : INFO : built Dictionary(47 unique tokens: ['work', 'god', 'urgnt', 'qatar', 'exposure']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:23,677 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,678 : INFO : built Dictionary(48 unique tokens: ['work', 'god', 'willing', 'benefits', 'free']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:23,700 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,701 : INFO : built Dictionary(59 unique tokens: ['ask', 'work', 'god', 'need', 'nurse']...) from 2 documents (total 71 corpus positions)\n", + "2018-09-11 22:27:23,732 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,734 : INFO : built Dictionary(57 unique tokens: ['work', 'actually', 'god', 'willing', 'contact']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:27:23,764 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,765 : INFO : built Dictionary(39 unique tokens: ['getting', 'get', 'people', 'god', 'ok']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:23,777 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,778 : INFO : built Dictionary(62 unique tokens: ['work', 'ql', 'starting', 'god', 'else']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:23,815 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:23,816 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,816 : INFO : built Dictionary(53 unique tokens: ['work', 'qnb', 'god', 'cards', 'free']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:23,844 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:23,845 : INFO : built Dictionary(37 unique tokens: ['work', 'get', 'people', 'god', 'willing']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:23,855 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:24,894 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,896 : INFO : built Dictionary(36 unique tokens: ['info', 'plz', 'open', 'tab', 'read']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:24,909 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:24,910 : INFO : built Dictionary(52 unique tokens: ['best', 'plz', 'open', 'know', 'healthy']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:24,933 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:24,934 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,935 : INFO : built Dictionary(41 unique tokens: ['would', 'plz', 'get', 'suggessions', 'tab']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:24,951 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,952 : INFO : built Dictionary(31 unique tokens: ['plz', 'qatar', 'open', 'know', 'lock']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:24,962 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,963 : INFO : built Dictionary(46 unique tokens: ['plz', 'said', 'open', 'tab', 'pad']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:24,983 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,984 : INFO : built Dictionary(35 unique tokens: ['plz', 'open', 'tab', 'lock', 'aaa']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:24,996 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:24,997 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:24,997 : INFO : built Dictionary(33 unique tokens: ['plz', 'open', 'tab', 'lock', 'coke']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:25,008 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,009 : INFO : built Dictionary(43 unique tokens: ['wife', 'plz', 'qatar', 'approximate', 'lock']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:25,027 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,028 : INFO : built Dictionary(49 unique tokens: ['plz', 'open', 'tab', 'pad', 'old']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:25,049 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:25,050 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,051 : INFO : built Dictionary(36 unique tokens: ['friend', 'plz', 'open', 'tab', 'doctor']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:25,063 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:25,539 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,540 : INFO : built Dictionary(32 unique tokens: ['get', 'bad', 'offers', 'could', 'hello']...) from 2 documents (total 86 corpus positions)\n", + "2018-09-11 22:27:25,570 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,571 : INFO : built Dictionary(42 unique tokens: ['offers', 'qatar', 'get', 'high', 'interviews']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:25,588 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,589 : INFO : built Dictionary(55 unique tokens: ['provide', 'bayt', 'interviews', 'cv', 'respond']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:25,623 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,624 : INFO : built Dictionary(34 unique tokens: ['offers', 'get', 'male', 'could', 'hello']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:25,631 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,632 : INFO : built Dictionary(37 unique tokens: ['qatar', 'get', 'bad', 'offers', 'could']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:25,642 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:25,643 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,644 : INFO : built Dictionary(49 unique tokens: ['recruiting', 'totally', 'hiring', 'good', 'bbc']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:25,666 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,668 : INFO : built Dictionary(55 unique tokens: ['work', 'drive', 'male', 'totally', 'disk']...) from 2 documents (total 75 corpus positions)\n", + "2018-09-11 22:27:25,697 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,699 : INFO : built Dictionary(48 unique tokens: ['interviews', 'totally', 'get', 'good', 'dubai']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:25,718 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,719 : INFO : built Dictionary(58 unique tokens: ['labour', 'totally', 'rp', 'hiring', 'good']...) from 2 documents (total 74 corpus positions)\n", + "2018-09-11 22:27:25,750 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:25,751 : INFO : built Dictionary(43 unique tokens: ['offers', 'qatar', 'get', 'bad', 'qatarized']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:25,764 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:26,718 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,720 : INFO : built Dictionary(34 unique tokens: ['group', 'thanks', 'right', 'hello', 'free']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:26,731 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:26,732 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,733 : INFO : built Dictionary(37 unique tokens: ['crowded', 'living', 'group', 'area', 'wants']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:26,745 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,746 : INFO : built Dictionary(17 unique tokens: ['badminton', 'group', 'plays', 'times', 'see']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:27:26,750 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:26,751 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,752 : INFO : built Dictionary(34 unique tokens: ['would', 'qatar', 'group', 'see', 'full']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:26,763 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:26,764 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,764 : INFO : built Dictionary(39 unique tokens: ['dito', 'group', 'ba', 'see', 'sumali']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:26,778 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,779 : INFO : built Dictionary(23 unique tokens: ['group', 'hello', 'anyone', 'wants', 'regularly']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:26,786 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,787 : INFO : built Dictionary(37 unique tokens: ['crowded', 'group', 'qbc', 'thanks', 'like']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:26,799 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,800 : INFO : built Dictionary(24 unique tokens: ['wife', 'swimming', 'group', 'program', 'could']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:26,806 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,807 : INFO : built Dictionary(31 unique tokens: ['shop', 'qatar', 'also', 'regular', 'decent']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:26,816 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:26,817 : INFO : built Dictionary(41 unique tokens: ['reasonable', 'swimming', 'group', 'wants', 'success']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:26,830 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:27,442 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,443 : INFO : built Dictionary(39 unique tokens: ['current', 'missing', 'countries', 'ive', 'scope']...) from 2 documents (total 54 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:27,457 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,459 : INFO : built Dictionary(26 unique tokens: ['plz', 'shop', 'open', 'ive', 'foreign']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:27,467 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,468 : INFO : built Dictionary(28 unique tokens: ['shop', 'get', 'ive', 'ok', 'thought']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:27,477 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:27,478 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,479 : INFO : built Dictionary(32 unique tokens: ['alternative', 'reply', 'ive', 'healthy', 'could']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:27,490 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,491 : INFO : built Dictionary(28 unique tokens: ['shop', 'get', 'air', 'closed', 'port']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:27,499 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,500 : INFO : built Dictionary(33 unique tokens: ['shop', 'get', 'countries', 'gaining', 'gulf']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:27,512 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,513 : INFO : built Dictionary(30 unique tokens: ['shop', 'get', 'ive', 'qatar', 'possible']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:27,523 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,524 : INFO : built Dictionary(28 unique tokens: ['shop', 'get', 'ive', 'opening', 'one']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:27,532 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,533 : INFO : built Dictionary(39 unique tokens: ['wanna', 'shop', 'problem', 'ive', 'ok']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:27,548 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:27,549 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:27,550 : INFO : built Dictionary(35 unique tokens: ['shop', 'get', 'people', 'ive', 'business']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:27,562 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:28,480 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,482 : INFO : built Dictionary(26 unique tokens: ['wife', 'park', 'qatar', 'tell', 'thanks']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:28,489 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,490 : INFO : built Dictionary(50 unique tokens: ['track', 'round', 'ok', 'alone', 'good']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:28,509 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,510 : INFO : built Dictionary(21 unique tokens: ['plz', 'viewpoint', 'name', 'private', 'near']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:28,515 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,516 : INFO : built Dictionary(18 unique tokens: ['best', 'alkhor', 'qatar', 'place', 'near']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:28,519 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,520 : INFO : built Dictionary(32 unique tokens: ['info', 'planet', 'already', 'tell', 'thanks']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:28,530 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,531 : INFO : built Dictionary(18 unique tokens: ['alkhor', 'name', 'private', 'chill', 'tell']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:27:28,534 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:28,535 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,536 : INFO : built Dictionary(23 unique tokens: ['park', 'dosas', 'thanks', 'one', 'dosa']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:28,542 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,543 : INFO : built Dictionary(31 unique tokens: ['park', 'tell', 'thanks', 'marked', 'wear']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:28,553 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:28,553 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,554 : INFO : built Dictionary(27 unique tokens: ['park', 'troll', 'thanks', 'gamers', 'hello']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:28,562 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:28,563 : INFO : built Dictionary(35 unique tokens: ['wife', 'work', 'advantage', 'thanks', 'park']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:28,575 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:29,234 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:29,235 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,236 : INFO : built Dictionary(42 unique tokens: ['arab', 'get', 'baldiya', 'knows', 'visit']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:29,252 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,253 : INFO : built Dictionary(36 unique tokens: ['arab', 'busy', 'schedule', 'wage', 'per']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:29,265 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:29,266 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,267 : INFO : built Dictionary(34 unique tokens: ['especially', 'arab', 'get', 'knows', 'thoub']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:29,278 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,279 : INFO : built Dictionary(33 unique tokens: ['arab', 'qatar', 'email', 'knows', 'could']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:29,289 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,290 : INFO : built Dictionary(33 unique tokens: ['arab', 'qatar', 'knows', 'thanks', 'kinda']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:29,300 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,301 : INFO : built Dictionary(57 unique tokens: ['childrens', 'arab', 'busy', 'spectrum', 'second']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:29,326 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:29,327 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,328 : INFO : built Dictionary(40 unique tokens: ['wife', 'qatar', 'cover', 'knees', 'usa']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:29,342 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,343 : INFO : built Dictionary(39 unique tokens: ['arab', 'body', 'nurseries', 'knows', 'extreme']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:29,356 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,357 : INFO : built Dictionary(29 unique tokens: ['friend', 'arab', 'qatar', 'gulf', 'busy']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:29,364 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:29,365 : INFO : built Dictionary(31 unique tokens: ['arab', 'busy', 'people', 'bit', 'else']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:29,374 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:30,222 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:30,223 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,225 : INFO : built Dictionary(26 unique tokens: ['mess', 'better', 'thank', 'one', 'food']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:30,233 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,234 : INFO : built Dictionary(30 unique tokens: ['mess', 'work', 'drive', 'area', 'guys']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:30,243 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,244 : INFO : built Dictionary(32 unique tokens: ['mess', 'korean', 'thanks', 'thank', 'please']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:30,254 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,255 : INFO : built Dictionary(41 unique tokens: ['mess', 'shop', 'also', 'opening', 'let']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:30,269 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,270 : INFO : built Dictionary(24 unique tokens: ['mess', 'thanks', 'could', 'lunch', 'hello']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:30,276 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,277 : INFO : built Dictionary(27 unique tokens: ['mess', 'get', 'would', 'real', 'pre']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:30,284 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,285 : INFO : built Dictionary(41 unique tokens: ['mess', 'getting', 'happens', 'means', 'thank']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:30,298 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:30,299 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,300 : INFO : built Dictionary(36 unique tokens: ['ask', 'mess', 'mumbai', 'know', 'manager']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:30,311 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:30,312 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,313 : INFO : built Dictionary(33 unique tokens: ['mess', 'said', 'air', 'baggage', 'per']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:30,322 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:30,323 : INFO : built Dictionary(36 unique tokens: ['friend', 'info', 'know', 'guys', 'past']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:30,334 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:31,043 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,045 : INFO : built Dictionary(44 unique tokens: ['speaking', 'maybe', 'hired', 'u', 'visa']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:31,061 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:31,062 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,063 : INFO : built Dictionary(35 unique tokens: ['kind', 'get', 'opening', 'visit', 'thanks']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:31,075 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,076 : INFO : built Dictionary(36 unique tokens: ['getting', 'get', 'would', 'bayt', 'better']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:31,088 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,089 : INFO : built Dictionary(37 unique tokens: ['proceed', 'get', 'recently', 'thanks', 'visit']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:31,101 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,102 : INFO : built Dictionary(38 unique tokens: ['get', 'pls', 'tell', 'thanks', 'past']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:31,115 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,116 : INFO : built Dictionary(36 unique tokens: ['loans', 'get', 'insurance', 'release', 'visit']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:31,129 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,130 : INFO : built Dictionary(37 unique tokens: ['get', 'website', 'bayt', 'passport', 'thanks']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:31,142 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,143 : INFO : built Dictionary(44 unique tokens: ['manager', 'hired', 'safe', 'somebody', 'dinners']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:31,159 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:31,160 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,160 : INFO : built Dictionary(18 unique tokens: ['get', 'visa', 'visit', 'thanks', 'aravind']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:31,164 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,165 : INFO : built Dictionary(49 unique tokens: ['work', 'actually', 'valid', 'indians', 'oil']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:31,183 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:31,985 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:31,986 : INFO : built Dictionary(47 unique tokens: ['best', 'healthy', 'reduce', 'please', 'food']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:32,004 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,005 : INFO : built Dictionary(40 unique tokens: ['friend', 'info', 'know', 'need', 'past']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:32,019 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,020 : INFO : built Dictionary(30 unique tokens: ['reduce', 'days', 'obesity', 'one', 'loosing']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:32,028 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,029 : INFO : built Dictionary(55 unique tokens: ['ask', 'constantly', 'day', 'lost', 'pregnancy']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:32,051 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,052 : INFO : built Dictionary(20 unique tokens: ['reduce', 'need', 'obesity', 'products', 'training']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:27:32,056 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,057 : INFO : built Dictionary(24 unique tokens: ['reduce', 'weight', 'please', 'information', 'tried']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:32,064 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,065 : INFO : built Dictionary(41 unique tokens: ['reduce', 'body', 'step', 'fasten', 'height']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:32,080 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:32,081 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,082 : INFO : built Dictionary(34 unique tokens: ['work', 'insight', 'healthy', 'past', 'suffering']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:32,093 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,094 : INFO : built Dictionary(26 unique tokens: ['obese', 'reduce', 'need', 'bread', 'right']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:32,102 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:32,103 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,103 : INFO : built Dictionary(26 unique tokens: ['reduce', 'pls', 'women', 'suffering', 'weight']...) from 2 documents (total 34 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:32,112 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:32,872 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,874 : INFO : built Dictionary(31 unique tokens: ['ask', 'permit', 'qatar', 'would', 'wife']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:32,882 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,882 : INFO : built Dictionary(32 unique tokens: ['wife', 'work', 'qatar', 'steps', 'n']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:32,894 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,895 : INFO : built Dictionary(42 unique tokens: ['ask', 'work', 'get', 'would', 'wife']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:32,914 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:32,915 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,916 : INFO : built Dictionary(50 unique tokens: ['ask', 'permit', 'n', 'said', 'usually']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:32,942 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,943 : INFO : built Dictionary(41 unique tokens: ['ask', 'permit', 'get', 'expat', 'wife']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:32,962 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,963 : INFO : built Dictionary(45 unique tokens: ['ask', 'permit', 'would', 'need', 'n']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:32,986 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,987 : INFO : built Dictionary(30 unique tokens: ['ask', 'permit', 'get', 'would', 'c']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:32,993 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:32,994 : INFO : built Dictionary(51 unique tokens: ['info', 'ask', 'know', 'interested', 'n']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:33,018 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,019 : INFO : built Dictionary(48 unique tokens: ['ask', 'permit', 'n', 'changing', 'manager']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:33,041 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,042 : INFO : built Dictionary(45 unique tokens: ['ask', 'permit', 'almost', 'work', 'second']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:33,062 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:33,758 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:33,759 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,760 : INFO : built Dictionary(53 unique tokens: ['trouble', 'plzzzzzzzzzzzzzzz', 'know', 'switch', 'please']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:27:33,786 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,787 : INFO : built Dictionary(33 unique tokens: ['switch', 'echo', 'could', 'yes', 'duster']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:33,797 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,798 : INFO : built Dictionary(36 unique tokens: ['select', 'honda', 'switch', 'better', 'could']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:33,812 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,813 : INFO : built Dictionary(30 unique tokens: ['honda', 'switch', 'resale', 'value', 'could']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:33,822 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,823 : INFO : built Dictionary(47 unique tokens: ['best', 'know', 'switch', 'n', 'hyundai']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:33,844 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:33,845 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,846 : INFO : built Dictionary(36 unique tokens: ['wanna', 'switch', 'left', 'p', 'could']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:33,858 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,859 : INFO : built Dictionary(36 unique tokens: ['qatar', 'honda', 'switch', 'better', 'could']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:33,873 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,874 : INFO : built Dictionary(34 unique tokens: ['switch', 'left', 'could', 'need', 'consumption']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:33,887 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,888 : INFO : built Dictionary(47 unique tokens: ['suzuki', 'accent', 'switch', 'following', 'value']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:33,910 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:33,911 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:33,912 : INFO : built Dictionary(51 unique tokens: ['jams', 'switch', 'better', 'r', 'please']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:33,937 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:34,738 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,740 : INFO : built Dictionary(38 unique tokens: ['exactly', 'government', 'coz', 'mineral', 'cost']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:34,755 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,756 : INFO : built Dictionary(39 unique tokens: ['flavours', 'waqif', 'seem', 'apple', 'exactly']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:34,771 : INFO : Removed 4 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:34,772 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,773 : INFO : built Dictionary(31 unique tokens: ['outrageously', 'exactly', 'talking', 'area', 'experienced']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:34,783 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:34,784 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,785 : INFO : built Dictionary(48 unique tokens: ['allowance', 'old', 'fees', 'food', 'offered']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:34,804 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:34,806 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,806 : INFO : built Dictionary(42 unique tokens: ['exactly', 'get', 'allow', 'knows', 'buy']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:34,822 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:34,823 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,824 : INFO : built Dictionary(26 unique tokens: ['group', 'url_token', 'thanks', 'fellow', 'exactly']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:34,831 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,832 : INFO : built Dictionary(24 unique tokens: ['brings', 'waqif', 'angel', 'souq', 'exactly']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:34,838 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,839 : INFO : built Dictionary(25 unique tokens: ['waqif', 'exactly', 'dont', 'possible', 'including']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:34,846 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,847 : INFO : built Dictionary(31 unique tokens: ['qatar', 'waqif', 'hours', 'thanks', 'soon']...) from 2 documents (total 39 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:34,857 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:34,858 : INFO : built Dictionary(46 unique tokens: ['aside', 'dance', 'wicked', 'cool', 'community']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:34,875 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:35,605 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,607 : INFO : built Dictionary(21 unique tokens: ['qatar', 'pls', 'psychologist', 'children', 'child']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:27:35,612 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,613 : INFO : built Dictionary(50 unique tokens: ['childrens', 'spectrum', 'second', 'son', 'moving']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:35,631 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,632 : INFO : built Dictionary(20 unique tokens: ['qatar', 'know', 'counselling', 'children', 'pediatric']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:35,637 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,638 : INFO : built Dictionary(33 unique tokens: ['within', 'qatar', 'expat', 'certainly', 'professionals']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:35,648 : INFO : Removed 3 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:35,649 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,650 : INFO : built Dictionary(38 unique tokens: ['qatar', 'handicaps', 'thank', 'please', 'response']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:35,663 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,664 : INFO : built Dictionary(45 unique tokens: ['know', 'providers', 'counseling', 'thank', 'please']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:35,679 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,680 : INFO : built Dictionary(40 unique tokens: ['anxious', 'qatar', 'pls', 'nurse', 'per']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:35,694 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,695 : INFO : built Dictionary(37 unique tokens: ['ahli', 'fluent', 'speaks', 'doctor', 'insurance']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:35,707 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,708 : INFO : built Dictionary(35 unique tokens: ['freaked', 'qatar', 'need', 'honest', 'forward']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:35,719 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:35,720 : INFO : built Dictionary(35 unique tokens: ['confidence', 'pageant', 'qatar', 'people', 'beauty']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:35,731 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:36,582 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,584 : INFO : built Dictionary(17 unique tokens: ['driving', 'drive', 'take', 'departure', 'company']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:36,593 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,594 : INFO : built Dictionary(22 unique tokens: ['qatar', 'yes', 'rent', 'good', 'car']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:36,599 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:36,600 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,601 : INFO : built Dictionary(55 unique tokens: ['drive', 'ir', 'thr', 'qatar', 'keep']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:36,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,627 : INFO : built Dictionary(23 unique tokens: ['drive', 'right', 'yes', 'rent', 'credential']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:36,632 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,632 : INFO : built Dictionary(31 unique tokens: ['drive', 'authenticate', 'thanks', 'yes', 'long']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:36,642 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,643 : INFO : built Dictionary(28 unique tokens: ['name', 'drive', 'yes', 'rent', 'engineer']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:36,651 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,652 : INFO : built Dictionary(22 unique tokens: ['qatar', 'yes', 'rent', 'travel', 'drive']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:36,656 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,657 : INFO : built Dictionary(27 unique tokens: ['drive', 'air', 'yes', 'landing', 'rent']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:36,664 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,665 : INFO : built Dictionary(26 unique tokens: ['drive', 'passport', 'even', 'cancel', 'rent']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:36,671 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:36,672 : INFO : built Dictionary(49 unique tokens: ['ask', 'wanna', 'drive', 'expired', 'back']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:27:36,692 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:37,370 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,372 : INFO : built Dictionary(43 unique tokens: ['resign', 'current', 'qatar', 'pls', 'permit']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:37,389 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:37,390 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,391 : INFO : built Dictionary(46 unique tokens: ['resign', 'resignation', 'pls', 'qatari', 'contract']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:37,410 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,411 : INFO : built Dictionary(30 unique tokens: ['resign', 'etc', 'qatar', 'need', 'thanks']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:37,421 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,422 : INFO : built Dictionary(45 unique tokens: ['entering', 'permit', 'resignation', 'work', 'resign']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:37,440 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:37,441 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,442 : INFO : built Dictionary(46 unique tokens: ['resign', 'resignation', 'allow', 'gratuity', 'benefits']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:37,460 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,461 : INFO : built Dictionary(47 unique tokens: ['resign', 'work', 'resignation', 'back', 'shall']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:37,480 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,481 : INFO : built Dictionary(52 unique tokens: ['wanna', 'work', 'resignation', 'actually', 'meâ']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:37,501 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,502 : INFO : built Dictionary(36 unique tokens: ['ask', 'resign', 'get', 'gratuity', 'back']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:37,514 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,515 : INFO : built Dictionary(37 unique tokens: ['resign', 'work', 'resignation', 'back', 'could']...) from 2 documents (total 43 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:37,527 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:37,528 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:37,529 : INFO : built Dictionary(45 unique tokens: ['resign', 'resignation', 'please', 'contract', 'months']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:37,546 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:38,383 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,385 : INFO : built Dictionary(53 unique tokens: ['open', 'outlandish', 'food', 'offered', 'house']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:38,412 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:38,413 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,414 : INFO : built Dictionary(45 unique tokens: ['shape', 'full', 'qatar', 'food', 'house']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:38,433 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,434 : INFO : built Dictionary(49 unique tokens: ['goods', 'shipments', 'need', 'morning', 'moving']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:38,457 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:38,458 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,459 : INFO : built Dictionary(60 unique tokens: ['morning', 'pants', 'wear', 'qatar', 'food']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:38,492 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,493 : INFO : built Dictionary(51 unique tokens: ['invited', 'morning', 'qatari', 'happens', 'wear']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:38,517 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,518 : INFO : built Dictionary(48 unique tokens: ['ok', 'wear', 'food', 'trousers', 'question']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:38,539 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,540 : INFO : built Dictionary(34 unique tokens: ['packed', 'ok', 'drink', 'soon', 'puts']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:38,549 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,550 : INFO : built Dictionary(44 unique tokens: ['ship', 'back', 'altima', 'india', 'offered']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:38,567 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:38,568 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,569 : INFO : built Dictionary(36 unique tokens: ['shop', 'qatar', 'spinney', 'drink', 'topic']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:38,578 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:38,579 : INFO : built Dictionary(54 unique tokens: ['wife', 'drive', 'would', 'tell', 'effects']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:38,605 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:39,186 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:39,187 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,188 : INFO : built Dictionary(47 unique tokens: ['rules', 'sponsorship', 'please', 'appreciated', 'months']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:39,210 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,212 : INFO : built Dictionary(36 unique tokens: ['current', 'get', 'thanks', 'right', 'long']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:39,225 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:39,226 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,227 : INFO : built Dictionary(47 unique tokens: ['kind', 'day', 'birth', 'required', 'months']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:39,249 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,250 : INFO : built Dictionary(39 unique tokens: ['wife', 'current', 'qatar', 'civil', 'even']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:39,267 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:39,268 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,268 : INFO : built Dictionary(35 unique tokens: ['current', 'entry', 'thanks', 'valid', 'sponsorship']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:39,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,282 : INFO : built Dictionary(49 unique tokens: ['ask', 'sponsorship', 'evening', 'went', 'please']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:39,304 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,305 : INFO : built Dictionary(29 unique tokens: ['work', 'get', 'thanks', 'current', 'transfer']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:39,316 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,317 : INFO : built Dictionary(46 unique tokens: ['totally', 'transfer', 'soon', 'please', 'lebanese']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:39,335 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,336 : INFO : built Dictionary(49 unique tokens: ['aside', 'form', 'transfer', 'please', 'ur']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:39,358 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:39,359 : INFO : built Dictionary(43 unique tokens: ['ask', 'wife', 'qatar', 'would', 'thanks']...) from 2 documents (total 70 corpus positions)\n", + "2018-09-11 22:27:39,379 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:40,278 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,279 : INFO : built Dictionary(50 unique tokens: ['said', 'saudi', 'ok', 'work', 'expat']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:40,306 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:40,307 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,308 : INFO : built Dictionary(46 unique tokens: ['work', 'cover', 'knees', 'ok', 'october']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:40,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,331 : INFO : built Dictionary(50 unique tokens: ['work', 'read', 'ok', 'wear', 'qatar']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:40,355 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:40,356 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,357 : INFO : built Dictionary(41 unique tokens: ['especially', 'qatar', 'get', 'ive', 'ok']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:40,373 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,374 : INFO : built Dictionary(34 unique tokens: ['wife', 'work', 'qatar', 'ive', 'woman']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:40,385 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,386 : INFO : built Dictionary(44 unique tokens: ['arab', 'work', 'curious', 'wear', 'others']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:40,404 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,405 : INFO : built Dictionary(46 unique tokens: ['cold', 'lool', 'ok', 'work', 'chilly']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:40,426 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:40,427 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,428 : INFO : built Dictionary(57 unique tokens: ['work', 'ok', 'pants', 'wear', 'qatar']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:40,461 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,462 : INFO : built Dictionary(50 unique tokens: ['said', 'countries', 'ok', 'work', 'attitiude']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:27:40,486 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:40,487 : INFO : built Dictionary(55 unique tokens: ['work', 'know', 'ok', 'qatari', 'wear']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:40,517 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:41,022 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,023 : INFO : built Dictionary(33 unique tokens: ['info', 'within', 'moving', 'would', 'thanks']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:41,033 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:41,034 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,035 : INFO : built Dictionary(47 unique tokens: ['know', 'feed', 'either', 'care', 'day']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:41,052 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,053 : INFO : built Dictionary(27 unique tokens: ['within', 'qatar', 'employers', 'hello', 'searching']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:41,060 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,061 : INFO : built Dictionary(47 unique tokens: ['would', 'care', 'get', 'information', 'advise']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:41,078 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,079 : INFO : built Dictionary(40 unique tokens: ['preschool', 'intend', 'get', 'group', 'hello']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:41,093 : INFO : Removed 3 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:41,094 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,094 : INFO : built Dictionary(40 unique tokens: ['within', 'qatar', 'handicaps', 'care', 'wanted']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:41,108 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,109 : INFO : built Dictionary(37 unique tokens: ['qatar', 'dies', 'know', 'dozed', 'afternoon']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:41,121 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,122 : INFO : built Dictionary(29 unique tokens: ['wife', 'work', 'qatar', 'within', 'hello']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:41,130 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,130 : INFO : built Dictionary(52 unique tokens: ['childrens', 'spectrum', 'second', 'old', 'care']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:41,149 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:41,150 : INFO : built Dictionary(38 unique tokens: ['within', 'qatar', 'old', 'care', 'cost']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:41,163 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:42,190 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,192 : INFO : built Dictionary(12 unique tokens: ['kat', 'attempt', 'east', 'know', 'middle']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:42,197 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,198 : INFO : built Dictionary(29 unique tokens: ['drive', 'thanks', 'soon', 'east', 'qatar']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:42,207 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,208 : INFO : built Dictionary(32 unique tokens: ['best', 'make', 'qatar', 'know', 'thanks']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:42,218 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:42,219 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,220 : INFO : built Dictionary(38 unique tokens: ['qatar', 'people', 'n', 'q', 'time']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:42,232 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,233 : INFO : built Dictionary(19 unique tokens: ['kat', 'middle', 'url_token', 'knows', 'attempt']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:42,237 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,238 : INFO : built Dictionary(32 unique tokens: ['ask', 'mom', 'heard', 'went', 'said']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:42,249 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,250 : INFO : built Dictionary(17 unique tokens: ['driving', 'kat', 'middle', 'attempt', 'know']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:42,253 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,254 : INFO : built Dictionary(36 unique tokens: ['ask', 'qatar', 'marijuana', 'could', 'jail']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:42,266 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,267 : INFO : built Dictionary(23 unique tokens: ['group', 'study', 'max', 'preferrably', 'arabic']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:42,273 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,273 : INFO : built Dictionary(30 unique tokens: ['kat', 'comments', 'looting', 'please', 'east']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:42,282 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:42,807 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,809 : INFO : built Dictionary(46 unique tokens: ['said', 'qar', 'moving', 'k', 'asked']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:42,826 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:42,827 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,828 : INFO : built Dictionary(40 unique tokens: ['hospitals', 'also', 'insurance', 'thanks', 'upon']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:42,844 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,845 : INFO : built Dictionary(49 unique tokens: ['info', 'read', 'rp', 'card', 'first']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:42,865 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,866 : INFO : built Dictionary(39 unique tokens: ['ranges', 'proceed', 'qatar', 'employment', 'insurance']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:42,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,880 : INFO : built Dictionary(28 unique tokens: ['native', 'treatment', 'insurance', 'also', 'free']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:42,889 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,890 : INFO : built Dictionary(35 unique tokens: ['info', 'dental', 'read', 'prices', 'travel']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:42,902 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,903 : INFO : built Dictionary(35 unique tokens: ['proceed', 'cover', 'tested', 'companies', 'opticians']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:42,916 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:42,916 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,917 : INFO : built Dictionary(46 unique tokens: ['section', 'c', 'labour', 'deliver', 'care']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:42,935 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:42,936 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,937 : INFO : built Dictionary(31 unique tokens: ['make', 'suggestions', 'closed', 'insurance', 'prices']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:42,946 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:42,947 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:42,948 : INFO : built Dictionary(38 unique tokens: ['proceed', 'get', 'suggestions', 'better', 'thanks']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:42,961 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:43,961 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:43,963 : INFO : built Dictionary(11 unique tokens: ['ur', 'decide', 'got', 'x', 'experts']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:27:43,967 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:43,968 : INFO : built Dictionary(25 unique tokens: ['qatar', 'deliver', 'mins', 'one', 'experts']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:43,974 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:43,975 : INFO : built Dictionary(27 unique tokens: ['qatar', 'thanks', 'mins', 'one', 'experts']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:43,983 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:43,984 : INFO : built Dictionary(35 unique tokens: ['said', 'break', 'know', 'ur', 'height']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:43,994 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:43,995 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:43,996 : INFO : built Dictionary(44 unique tokens: ['cover', 'protect', 'tragic', 'q', 'ur']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:44,010 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,011 : INFO : built Dictionary(38 unique tokens: ['especially', 'ql', 'htc', 'thanks', 'mins']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:44,022 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:44,023 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,024 : INFO : built Dictionary(30 unique tokens: ['hospitals', 'better', 'mins', 'care', 'fans']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:44,033 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,033 : INFO : built Dictionary(33 unique tokens: ['interesting', 'equality', 'htc', 'mins', 'achieved']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:44,043 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,044 : INFO : built Dictionary(35 unique tokens: ['qatar', 'haram', 'dare', 'per', 'drink']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:44,054 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,055 : INFO : built Dictionary(36 unique tokens: ['ql', 'starting', 'mins', 'social', 'things']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:44,065 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:44,631 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,632 : INFO : built Dictionary(23 unique tokens: ['within', 'almost', 'gives', 'quiries', 'finish']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:44,638 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,639 : INFO : built Dictionary(12 unique tokens: ['company', 'gives', 'sponsorship', 'advise', 'transfer']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:27:44,642 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,643 : INFO : built Dictionary(30 unique tokens: ['work', 'problem', 'pay', 'gives', 'qatar']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:44,650 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,650 : INFO : built Dictionary(14 unique tokens: ['temporary', 'company', 'gives', 'transfer', 'mean']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:44,654 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,654 : INFO : built Dictionary(36 unique tokens: ['get', 'need', 'gives', 'transfer', 'thank']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:44,663 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,664 : INFO : built Dictionary(18 unique tokens: ['got', 'sponsership', 'provided', 'months', 'company']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:44,668 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:44,669 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,670 : INFO : built Dictionary(40 unique tokens: ['resign', 'within', 'pls', 'per', 'gives']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:44,680 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,681 : INFO : built Dictionary(23 unique tokens: ['within', 'moving', 'pack', 'gives', 'possible']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:44,686 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,687 : INFO : built Dictionary(34 unique tokens: ['promised', 'recently', 'release', 'gives', 'liabilities']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:44,694 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:44,695 : INFO : built Dictionary(31 unique tokens: ['body', 'administration', 'changing', 'manager', 'transfer']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:44,702 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:45,740 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,742 : INFO : built Dictionary(35 unique tokens: ['shop', 'get', 'samosas', 'area', 'thanks']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:45,777 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,778 : INFO : built Dictionary(47 unique tokens: ['shop', 'trace', 'small', 'festival', 'anybody']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:45,799 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,800 : INFO : built Dictionary(72 unique tokens: ['wife', 'know', 'else', 'trace', 'small']...) from 2 documents (total 81 corpus positions)\n", + "2018-09-11 22:27:45,852 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,853 : INFO : built Dictionary(45 unique tokens: ['shop', 'trace', 'serves', 'please', 'good']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:45,867 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,868 : INFO : built Dictionary(51 unique tokens: ['shop', 'da', 'soon', 'small', 'please']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:45,891 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,892 : INFO : built Dictionary(45 unique tokens: ['shop', 'samosas', 'trace', 'small', 'please']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:45,906 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,907 : INFO : built Dictionary(50 unique tokens: ['agra', 'entering', 'okay', 'jaipur', 'shop']...) from 2 documents (total 59 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:45,927 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,928 : INFO : built Dictionary(49 unique tokens: ['shop', 'trace', 'small', 'please', 'india']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:45,947 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,948 : INFO : built Dictionary(52 unique tokens: ['shop', 'kill', 'trace', 'small', 'police']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:45,970 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:45,972 : INFO : built Dictionary(52 unique tokens: ['shop', 'drive', 'second', 'small', 'please']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:45,994 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:46,372 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,373 : INFO : built Dictionary(39 unique tokens: ['wife', 'qatar', 'days', 'tell', 'civil']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:46,390 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,391 : INFO : built Dictionary(44 unique tokens: ['ask', 'know', 'read', 'second', 'please']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:27:46,411 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,412 : INFO : built Dictionary(31 unique tokens: ['body', 'provide', 'tell', 'second', 'qatar']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:46,423 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,424 : INFO : built Dictionary(30 unique tokens: ['wife', 'body', 'days', 'tell', 'second']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:46,434 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,435 : INFO : built Dictionary(39 unique tokens: ['body', 'uae', 'tell', 'real', 'wait']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:46,451 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:46,452 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,452 : INFO : built Dictionary(39 unique tokens: ['difference', 'said', 'qatar', 'pls', 'tell']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:27:46,468 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:46,469 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,470 : INFO : built Dictionary(47 unique tokens: ['tell', 'rules', 'second', 'body', 'first']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:46,491 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:46,492 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,493 : INFO : built Dictionary(50 unique tokens: ['knowledge', 'tell', 'second', 'day', 'birth']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:46,516 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,517 : INFO : built Dictionary(44 unique tokens: ['need', 'totally', 'second', 'soon', 'qatar']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:46,536 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:46,538 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:46,538 : INFO : built Dictionary(40 unique tokens: ['get', 'baldiya', 'tell', 'visit', 'even']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:46,555 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:47,676 : INFO : Removed 4 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,678 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,678 : INFO : built Dictionary(36 unique tokens: ['flavoured', 'frying', 'qatar', 'know', 'following']...) from 2 documents (total 88 corpus positions)\n", + "2018-09-11 22:27:47,717 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,718 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,719 : INFO : built Dictionary(45 unique tokens: ['frying', 'know', 'following', 'mayonnaise', 'oil']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:47,734 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,735 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,736 : INFO : built Dictionary(41 unique tokens: ['flavoured', 'frying', 'qatar', 'light', 'following']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:47,745 : INFO : Removed 1 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,746 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,747 : INFO : built Dictionary(55 unique tokens: ['shape', 'light', 'following', 'full', 'frying']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:27:47,771 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,772 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,773 : INFO : built Dictionary(45 unique tokens: ['frying', 'know', 'following', 'mayonnaise', 'oil']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:47,788 : INFO : Removed 2 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,789 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,790 : INFO : built Dictionary(54 unique tokens: ['frying', 'light', 'following', 'mayonnaise', 'oil']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:27:47,816 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,817 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,818 : INFO : built Dictionary(54 unique tokens: ['shop', 'light', 'unlike', 'following', 'frying']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:27:47,842 : INFO : Removed 1 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,843 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,844 : INFO : built Dictionary(61 unique tokens: ['said', 'light', 'following', 'frying', 'dog']...) from 2 documents (total 76 corpus positions)\n", + "2018-09-11 22:27:47,877 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,879 : INFO : built Dictionary(73 unique tokens: ['flavoured', 'light', 'yogurts', 'following', 'frying']...) from 2 documents (total 87 corpus positions)\n", + "2018-09-11 22:27:47,933 : INFO : Removed 0 and 4 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:47,933 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:47,934 : INFO : built Dictionary(41 unique tokens: ['flavoured', 'frying', 'qatar', 'light', 'ok']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:47,943 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:48,229 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,230 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,231 : INFO : built Dictionary(35 unique tokens: ['organized', 'within', 'people', 'couple', 'lives']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:48,243 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,244 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,245 : INFO : built Dictionary(19 unique tokens: ['player', 'people', 'basket', 'courts', 'afternoon']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:48,250 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:48,250 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,251 : INFO : built Dictionary(24 unique tokens: ['group', 'c', 'thanks', 'lives', 'please']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:27:48,258 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,258 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,259 : INFO : built Dictionary(18 unique tokens: ['player', 'people', 'basket', 'knows', 'playin']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:27:48,263 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,264 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,265 : INFO : built Dictionary(35 unique tokens: ['group', 'back', 'could', 'sports', 'lives']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:48,277 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,278 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,279 : INFO : built Dictionary(48 unique tokens: ['ask', 'book', 'busy', 'ok', 'p']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:48,297 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,298 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,299 : INFO : built Dictionary(48 unique tokens: ['wondered', 'golf', 'sports', 'watch', 'best']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:48,317 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,318 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,319 : INFO : built Dictionary(18 unique tokens: ['player', 'people', 'basket', 'lives', 'games']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:48,322 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,323 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,324 : INFO : built Dictionary(20 unique tokens: ['player', 'thankyou', 'gardens', 'compound', 'basket']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:48,328 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:48,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:48,330 : INFO : built Dictionary(28 unique tokens: ['problem', 'people', 'lives', 'contact', 'meachum']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:48,338 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:49,630 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,631 : INFO : built Dictionary(15 unique tokens: ['group', 'study', 'max', 'personal', 'preferrably']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:27:49,634 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,635 : INFO : built Dictionary(30 unique tokens: ['barely', 'qatar', 'know', 'none', 'study']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:49,642 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,643 : INFO : built Dictionary(20 unique tokens: ['would', 'group', 'thanks', 'well', 'know']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:49,647 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,648 : INFO : built Dictionary(20 unique tokens: ['living', 'place', 'teaching', 'among', 'speaking']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:49,652 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,653 : INFO : built Dictionary(33 unique tokens: ['website', 'get', 'actually', 'yes', 'replied']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:49,659 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,660 : INFO : built Dictionary(24 unique tokens: ['getting', 'get', 'fascination', 'almost', 'u']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:49,665 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,666 : INFO : built Dictionary(26 unique tokens: ['work', 'western', 'care', 'know', 'arabic']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:49,671 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,672 : INFO : built Dictionary(28 unique tokens: ['round', 'people', 'speaking', 'english', 'coz']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:49,678 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,679 : INFO : built Dictionary(24 unique tokens: ['knowing', 'merely', 'people', 'scope', 'almost']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:49,683 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:49,684 : INFO : built Dictionary(12 unique tokens: ['study', 'knows', 'know', 'language', 'arabic']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:27:49,687 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:50,017 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,019 : INFO : built Dictionary(13 unique tokens: ['getting', 'fish', 'harbor', 'inform', 'fresh']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:50,022 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,023 : INFO : built Dictionary(16 unique tokens: ['could', 'find', 'restaurant', 'inform', 'fresh']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:27:50,027 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,028 : INFO : built Dictionary(30 unique tokens: ['wife', 'caviar', 'thanks', 'mer', 'inform']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:50,035 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,036 : INFO : built Dictionary(31 unique tokens: ['ask', 'treat', 'talking', 'trip', 'enjoy']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:50,045 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,046 : INFO : built Dictionary(25 unique tokens: ['frequent', 'recipe', 'thanks', 'restaurant', 'also']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:50,052 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:50,053 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,054 : INFO : built Dictionary(23 unique tokens: ['restaurants', 'thanks', 'restaurant', 'inform', 'please']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:50,060 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,061 : INFO : built Dictionary(30 unique tokens: ['welcomed', 'thanks', 'belt', 'inform', 'kids']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:27:50,068 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,069 : INFO : built Dictionary(14 unique tokens: ['many', 'qatar', 'filipinos', 'find', 'inform']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:27:50,072 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,073 : INFO : built Dictionary(35 unique tokens: ['fact', 'info', 'already', 'starting', 'else']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:50,082 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:50,083 : INFO : built Dictionary(12 unique tokens: ['least', 'inform', 'fresh', 'safe', 'please']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:27:50,086 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:51,365 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,366 : INFO : built Dictionary(17 unique tokens: ['wife', 'doha', 'work', 'september', 'options']...) from 2 documents (total 38 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:51,376 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,377 : INFO : built Dictionary(40 unique tokens: ['wife', 'work', 'old', 'cost', 'resume']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:27:51,393 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,394 : INFO : built Dictionary(55 unique tokens: ['wife', 'drive', 'course', 'work', 'cleaning']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:51,419 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,421 : INFO : built Dictionary(44 unique tokens: ['intend', 'group', 'work', 'contact', 'options']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:51,439 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,440 : INFO : built Dictionary(30 unique tokens: ['wife', 'work', 'know', 'thanks', 'english']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:51,449 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,450 : INFO : built Dictionary(31 unique tokens: ['wife', 'work', 'qatar', 'email', 'could']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:51,460 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,461 : INFO : built Dictionary(38 unique tokens: ['info', 'wife', 'moving', 'would', 'thanks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:51,474 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,475 : INFO : built Dictionary(36 unique tokens: ['wife', 'work', 'busy', 'thanks', 'comments']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:51,487 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,488 : INFO : built Dictionary(31 unique tokens: ['wife', 'sending', 'teachers', 'work', 'nursery']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:51,498 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,499 : INFO : built Dictionary(25 unique tokens: ['wife', 'work', 'teaching', 'nursery', 'options']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:51,505 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:51,765 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,767 : INFO : built Dictionary(37 unique tokens: ['qatar', 'suggestions', 'located', 'better', 'nman']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:51,782 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,783 : INFO : built Dictionary(29 unique tokens: ['qatar', 'charges', 'located', 'better', 'gives']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:51,793 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:51,794 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,795 : INFO : built Dictionary(49 unique tokens: ['unable', 'nri', 'better', 'indians', 'icici']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:51,816 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:51,817 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,817 : INFO : built Dictionary(49 unique tokens: ['euros', 'better', 'telling', 'qatar', 'india']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:51,837 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,838 : INFO : built Dictionary(32 unique tokens: ['qatar', 'hello', 'better', 'gives', 'need']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:51,848 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,849 : INFO : built Dictionary(25 unique tokens: ['work', 'qatar', 'located', 'better', 'afternoon']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:51,855 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,856 : INFO : built Dictionary(34 unique tokens: ['get', 'unlike', 'better', 'gives', 'evening']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:51,867 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,868 : INFO : built Dictionary(50 unique tokens: ['work', 'better', 'remit', 'qatari', 'qar']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:51,888 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,889 : INFO : built Dictionary(50 unique tokens: ['bundle', 'ok', 'forgets', 'care', 'card']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:51,909 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:51,910 : INFO : built Dictionary(34 unique tokens: ['qatar', 'located', 'better', 'wage', 'per']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:51,921 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:53,188 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,189 : INFO : built Dictionary(40 unique tokens: ['get', 'website', 'bayt', 'passport', 'thanks']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:53,203 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,204 : INFO : built Dictionary(35 unique tokens: ['expenses', 'qatar', 'estimate', 'whatsoever', 'ideas']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:53,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,216 : INFO : built Dictionary(33 unique tokens: ['qatar', 'cash', 'estimate', 'one', 'options']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:53,226 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,227 : INFO : built Dictionary(54 unique tokens: ['info', 'learing', 'admin', 'ill', 'value']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:53,247 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,248 : INFO : built Dictionary(33 unique tokens: ['interested', 'estimate', 'hello', 'sapper', 'amount']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:53,257 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,258 : INFO : built Dictionary(55 unique tokens: ['would', 'better', 'estimate', 'memory', 'amount']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:27:53,278 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,279 : INFO : built Dictionary(32 unique tokens: ['work', 'qatar', 'group', 'real', 'estimate']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:27:53,288 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,289 : INFO : built Dictionary(23 unique tokens: ['qatarized', 'estimate', 'please', 'one', 'range']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:53,294 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,295 : INFO : built Dictionary(41 unique tokens: ['zag', 'qatar', 'towers', 'estimate', 'either']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:27:53,308 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,309 : INFO : built Dictionary(40 unique tokens: ['expenses', 'get', 'pls', 'tell', 'past']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:53,321 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:53,594 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,596 : INFO : built Dictionary(26 unique tokens: ['within', 'moving', 'pack', 'misc', 'household']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:27:53,605 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,606 : INFO : built Dictionary(24 unique tokens: ['know', 'misc', 'india', 'house', 'packers']...) from 2 documents (total 32 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:53,613 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,614 : INFO : built Dictionary(37 unique tokens: ['work', 'problem', 'household', 'pay', 'house']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:53,626 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,627 : INFO : built Dictionary(37 unique tokens: ['availabe', 'qatar', 'know', 'located', 'transport']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:27:53,640 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,641 : INFO : built Dictionary(35 unique tokens: ['clear', 'goods', 'household', 'companies', 'thanks']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:53,652 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:53,653 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,654 : INFO : built Dictionary(38 unique tokens: ['info', 'goods', 'know', 'service', 'shipped']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:53,667 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,668 : INFO : built Dictionary(35 unique tokens: ['loans', 'permit', 'get', 'consumed', 'companies']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:53,680 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,681 : INFO : built Dictionary(43 unique tokens: ['clearance', 'work', 'qatar', 'inform', 'thanks']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:53,696 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,697 : INFO : built Dictionary(25 unique tokens: ['candians', 'qatar', 'group', 'misc', 'canadians']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:53,703 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:53,704 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:53,705 : INFO : built Dictionary(35 unique tokens: ['living', 'people', 'misc', 'true', 'also']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:53,717 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:54,999 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,001 : INFO : built Dictionary(32 unique tokens: ['consultancy', 'microsoft', 'thanks', 'hello', 'engineer']...) from 2 documents (total 76 corpus positions)\n", + "2018-09-11 22:27:55,030 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,031 : INFO : built Dictionary(45 unique tokens: ['reputation', 'please', 'accommodation', 'feedback', 'thur']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:55,052 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,053 : INFO : built Dictionary(54 unique tokens: ['especially', 'air', 'wed', 'ticket', 'accommodation']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:27:55,083 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,084 : INFO : built Dictionary(58 unique tokens: ['allowance', 'decent', 'leads', 'accommodation', 'good']...) from 2 documents (total 79 corpus positions)\n", + "2018-09-11 22:27:55,120 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,121 : INFO : built Dictionary(59 unique tokens: ['permit', 'c', 'say', 'please', 'food']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:55,153 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,154 : INFO : built Dictionary(45 unique tokens: ['glasses', 'please', 'accommodation', 'tue', 'years']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:55,172 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,173 : INFO : built Dictionary(58 unique tokens: ['countries', 'mon', 'wed', 'please', 'accommodation']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:27:55,205 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,206 : INFO : built Dictionary(44 unique tokens: ['said', 'soon', 'please', 'accommodation', 'thur']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:55,221 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,222 : INFO : built Dictionary(60 unique tokens: ['program', 'salaries', 'reply', 'accommodation', 'thur']...) from 2 documents (total 78 corpus positions)\n", + "2018-09-11 22:27:55,257 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,258 : INFO : built Dictionary(53 unique tokens: ['concerning', 'cost', 'please', 'accommodation', 'tue']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:27:55,286 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:55,397 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,398 : INFO : built Dictionary(45 unique tokens: ['info', 'read', 'rp', 'card', 'first']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:27:55,417 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,418 : INFO : built Dictionary(27 unique tokens: ['wife', 'qatar', 'need', 'deliver', 'cost']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:27:55,425 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,426 : INFO : built Dictionary(36 unique tokens: ['wife', 'sponsorship', 'visit', 'best', 'month']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:55,439 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,440 : INFO : built Dictionary(37 unique tokens: ['wife', 'hospitals', 'qatar', 'thanks', 'visit']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:55,453 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,454 : INFO : built Dictionary(44 unique tokens: ['pls', 'need', 'rp', 'please', 'required']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:27:55,472 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,473 : INFO : built Dictionary(35 unique tokens: ['wife', 'work', 'card', 'weeks', 'thanks']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:55,486 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,487 : INFO : built Dictionary(27 unique tokens: ['wife', 'qatar', 'features', 'card', 'available']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:27:55,495 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,496 : INFO : built Dictionary(37 unique tokens: ['ranges', 'wife', 'qatar', 'employment', 'speaking']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:27:55,509 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,510 : INFO : built Dictionary(45 unique tokens: ['said', 'card', 'qar', 'moving', 'k']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:27:55,527 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:55,528 : INFO : built Dictionary(28 unique tokens: ['wife', 'native', 'treatment', 'also', 'free']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:55,536 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:56,967 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:56,969 : INFO : built Dictionary(44 unique tokens: ['starting', 'program', 'salaries', 'reply', 'accommodation']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:56,984 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:56,985 : INFO : built Dictionary(19 unique tokens: ['credential', 'qatar', 'application', 'right', 'comments']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:27:56,989 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:56,990 : INFO : built Dictionary(51 unique tokens: ['starting', 'towers', 'p', 'please', 'good']...) from 2 documents (total 66 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:57,009 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,010 : INFO : built Dictionary(41 unique tokens: ['would', 'added', 'ql', 'starting', 'better']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:57,023 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,024 : INFO : built Dictionary(24 unique tokens: ['qatar', 'starting', 'hobby', 'past', 'comments']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:57,030 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,031 : INFO : built Dictionary(47 unique tokens: ['starting', 'climbing', 'soon', 'moving', 'good']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:57,049 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,050 : INFO : built Dictionary(43 unique tokens: ['info', 'getting', 'problem', 'pack', 'settings']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:57,065 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,066 : INFO : built Dictionary(40 unique tokens: ['qatar', 'people', 'blocked', 'thanks', 'wait']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:27:57,080 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,081 : INFO : built Dictionary(38 unique tokens: ['get', 'urban', 'thanks', 'could', 'comments']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:57,093 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,094 : INFO : built Dictionary(21 unique tokens: ['informed', 'qatar', 'spoken', 'know', 'comments']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:57,099 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:57,204 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,206 : INFO : built Dictionary(42 unique tokens: ['form', 'rp', 'payroll', 'thanks', 'pcc']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:27:57,224 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,225 : INFO : built Dictionary(31 unique tokens: ['know', 'form', 'rules', 'government', 'asking']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:57,235 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,236 : INFO : built Dictionary(28 unique tokens: ['work', 'thanks', 'pcc', 'even', 'sponsorship']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:57,244 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,245 : INFO : built Dictionary(36 unique tokens: ['taken', 'passport', 'form', 'pcc', 'government']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:27:57,260 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,261 : INFO : built Dictionary(36 unique tokens: ['fathers', 'maid', 'form', 'changing', 'government']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:57,274 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,275 : INFO : built Dictionary(27 unique tokens: ['wife', 'work', 'steps', 'form', 'pcc']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:27:57,283 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,284 : INFO : built Dictionary(41 unique tokens: ['body', 'administration', 'changing', 'manager', 'sponsorship']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:57,299 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,300 : INFO : built Dictionary(23 unique tokens: ['form', 'pcc', 'government', 'transfer', 'thank']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:27:57,305 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,306 : INFO : built Dictionary(40 unique tokens: ['hai', 'received', 'would', 'like', 'form']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:57,321 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:57,322 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:57,323 : INFO : built Dictionary(51 unique tokens: ['resign', 'pls', 'form', 'qatari', 'contract']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:57,346 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:58,784 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,786 : INFO : built Dictionary(41 unique tokens: ['current', 'qatar', 'people', 'posts', 'per']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:27:58,805 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,806 : INFO : built Dictionary(49 unique tokens: ['would', 'seen', 'mike', 'decent', 'please']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:27:58,831 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,832 : INFO : built Dictionary(36 unique tokens: ['tt', 'current', 'qatar', 'posts', 'evenings']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:27:58,845 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,846 : INFO : built Dictionary(29 unique tokens: ['current', 'qatar', 'posts', 'tt', 'mike']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:27:58,853 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,854 : INFO : built Dictionary(24 unique tokens: ['current', 'qatar', 'tt', 'mike', 'hello']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:27:58,870 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,871 : INFO : built Dictionary(25 unique tokens: ['current', 'qatar', 'anybody', 'tt', 'mike']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:58,875 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,876 : INFO : built Dictionary(43 unique tokens: ['tt', 'give', 'qatar', 'c', 'thanks']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:27:58,893 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,894 : INFO : built Dictionary(35 unique tokens: ['current', 'swimming', 'seen', 'tt', 'mike']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:58,905 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,906 : INFO : built Dictionary(50 unique tokens: ['mike', 'soon', 'qatar', 'eid', 'coolest']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:27:58,929 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:58,930 : INFO : built Dictionary(37 unique tokens: ['current', 'qatar', 'functioning', 'posts', 'tt']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:27:58,942 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:27:59,013 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,014 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,015 : INFO : built Dictionary(41 unique tokens: ['suggest', 'except', 'max', 'costs', 'care']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:27:59,032 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,033 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,033 : INFO : built Dictionary(39 unique tokens: ['within', 'body', 'suggest', 'except', 'later']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:27:59,047 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,048 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,049 : INFO : built Dictionary(47 unique tokens: ['work', 'decreasing', 'average', 'u', 'think']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:27:59,068 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:27:59,069 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,070 : INFO : built Dictionary(27 unique tokens: ['plz', 'except', 'thanks', 'one', 'mazda']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:27:59,078 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,079 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,079 : INFO : built Dictionary(25 unique tokens: ['get', 'cleaned', 'except', 'thanks', 'cleaning']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:27:59,087 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,088 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,089 : INFO : built Dictionary(25 unique tokens: ['qatar', 'except', 'thanks', 'costs', 'selection']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:27:59,095 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,096 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,097 : INFO : built Dictionary(23 unique tokens: ['except', 'thanks', 'km', 'car', 'tiida']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:27:59,102 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,103 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,103 : INFO : built Dictionary(49 unique tokens: ['night', 'ok', 'full', 'hands', 'type']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:27:59,123 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,124 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,125 : INFO : built Dictionary(33 unique tokens: ['camera', 'except', 'thanks', 'dont', 'money']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:27:59,137 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:27:59,137 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:27:59,138 : INFO : built Dictionary(32 unique tokens: ['realize', 'drive', 'except', 'ok', 'damaged']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:27:59,149 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:00,616 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,617 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,618 : INFO : built Dictionary(27 unique tokens: ['kind', 'opening', 'could', 'evening', 'lady']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:00,625 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,625 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,626 : INFO : built Dictionary(24 unique tokens: ['opening', 'comes', 'land', 'doha', 'friends']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:00,631 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,632 : INFO : built Dictionary(14 unique tokens: ['added', 'open', 'stores', 'opening', 'villagio']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:28:00,635 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,636 : INFO : built Dictionary(9 unique tokens: ['opening', 'regarding', 'bookstore', 'villagio', 'friends']...) from 2 documents (total 10 corpus positions)\n", + "2018-09-11 22:28:00,637 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,638 : INFO : built Dictionary(14 unique tokens: ['implemented', 'existing', 'opening', 'villagio', 'day']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:28:00,641 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,641 : INFO : built Dictionary(13 unique tokens: ['park', 'name', 'near', 'thanks', 'villagio']...) from 2 documents (total 18 corpus positions)\n", + "2018-09-11 22:28:00,644 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,645 : INFO : built Dictionary(33 unique tokens: ['getting', 'apartments', 'stuff', 'mornings', 'also']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:00,652 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,653 : INFO : built Dictionary(31 unique tokens: ['update', 'pls', 'opening', 'true', 'decent']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:28:00,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,661 : INFO : built Dictionary(37 unique tokens: ['qatar', 'opening', 'soon', 'stores', 'july']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:00,669 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,670 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,670 : INFO : built Dictionary(8 unique tokens: ['fire', 'opening', 'villagio', 'friends', 'today']...) from 2 documents (total 11 corpus positions)\n", + "2018-09-11 22:28:00,672 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:00,831 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,832 : INFO : built Dictionary(41 unique tokens: ['picture', 'wasnt', 'get', 'would', 'directly']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:28:00,847 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,848 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,848 : INFO : built Dictionary(47 unique tokens: ['scratch', 'morning', 'damaged', 'incident', 'evening']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:00,866 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,867 : INFO : built Dictionary(37 unique tokens: ['done', 'work', 'get', 'area', 'damaged']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:28:00,878 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,879 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,880 : INFO : built Dictionary(23 unique tokens: ['scratch', 'get', 'need', 'bumper', 'wakrah']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:00,886 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,887 : INFO : built Dictionary(16 unique tokens: ['qatar', 'plate', 'lost', 'damaged', 'wakrah']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:28:00,890 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:00,891 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,891 : INFO : built Dictionary(21 unique tokens: ['done', 'shop', 'programing', 'qnb', 'beside']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:00,897 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,898 : INFO : built Dictionary(41 unique tokens: ['ask', 'stone', 'went', 'agree', 'thanks']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:00,912 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,913 : INFO : built Dictionary(33 unique tokens: ['done', 'signs', 'living', 'people', 'keeps']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:00,923 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,924 : INFO : built Dictionary(41 unique tokens: ['fitted', 'qatar', 'countries', 'action', 'reduce']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:28:00,938 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:00,939 : INFO : built Dictionary(38 unique tokens: ['said', 'get', 'mitsubishi', 'damaged', 'rely']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:00,952 : INFO : precomputing L2-norms of word weight vectors\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:02,351 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,352 : INFO : built Dictionary(10 unique tokens: ['ireland', 'anyone', 'thanks', 'recommend', 'hairdressers']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:28:02,356 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:02,357 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,358 : INFO : built Dictionary(17 unique tokens: ['hair', 'pay', 'fantastic', 'thanks', 'hairdressers']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:28:02,362 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,363 : INFO : built Dictionary(28 unique tokens: ['wife', 'thanks', 'hairdressers', 'care', 'recommend']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:02,371 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:02,371 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,372 : INFO : built Dictionary(35 unique tokens: ['girly', 'getting', 'received', 'starting', 'recommend']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:02,383 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,384 : INFO : built Dictionary(21 unique tokens: ['hair', 'november', 'get', 'dresser', 'thanks']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:28:02,388 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,389 : INFO : built Dictionary(25 unique tokens: ['friend', 'dresser', 'thanks', 'hairdressers', 'ireland']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:02,396 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,397 : INFO : built Dictionary(26 unique tokens: ['reasonable', 'pls', 'thanks', 'reasnable', 'hello']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:02,405 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,406 : INFO : built Dictionary(36 unique tokens: ['people', 'need', 'thanks', 'totally', 'hairdresser']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:28:02,416 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,417 : INFO : built Dictionary(16 unique tokens: ['ireland', 'get', 'thanks', 'hairdresser', 'hairdressers']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:28:02,420 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,421 : INFO : built Dictionary(24 unique tokens: ['thanks', 'hairdressers', 'ireland', 'one', 'window']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:02,428 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:02,621 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:02,622 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,623 : INFO : built Dictionary(41 unique tokens: ['getting', 'received', 'thanks', 'work', 'long']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:02,641 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,642 : INFO : built Dictionary(42 unique tokens: ['wife', 'get', 'rp', 'labour', 'could']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:02,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,661 : INFO : built Dictionary(31 unique tokens: ['qatar', 'tell', 'could', 'time', 'long']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:02,670 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,671 : INFO : built Dictionary(49 unique tokens: ['info', 'tell', 'son', 'hiring', 'contract']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:28:02,694 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,695 : INFO : built Dictionary(47 unique tokens: ['website', 'know', 'please', 'apply', 'regards']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:28:02,716 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,717 : INFO : built Dictionary(39 unique tokens: ['info', 'read', 'could', 'time', 'long']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:02,732 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,733 : INFO : built Dictionary(38 unique tokens: ['business', 'get', 'travel', 'almost', 'could']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:28:02,748 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,749 : INFO : built Dictionary(40 unique tokens: ['permits', 'permit', 'means', 'could', 'â']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:02,764 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,765 : INFO : built Dictionary(42 unique tokens: ['body', 'tell', 'administration', 'changing', 'manager']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:02,782 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:02,783 : INFO : built Dictionary(28 unique tokens: ['qatar', 'could', 'time', 'long', 'month']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:02,791 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:04,105 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,106 : INFO : built Dictionary(15 unique tokens: ['bank', 'give', 'qatar', 'appreciate', 'personal']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:04,114 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,115 : INFO : built Dictionary(23 unique tokens: ['loans', 'give', 'qatar', 'comments', 'frns']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:04,122 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,123 : INFO : built Dictionary(43 unique tokens: ['aside', 'give', 'charges', 'payment', 'believed']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:04,140 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,141 : INFO : built Dictionary(21 unique tokens: ['bank', 'give', 'qatar', 'appreciate', 'need']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:04,146 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,147 : INFO : built Dictionary(37 unique tokens: ['loans', 'work', 'problem', 'islamic', 'pay']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:04,161 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,162 : INFO : built Dictionary(40 unique tokens: ['loans', 'give', 'open', 'necessity', 'choices']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:04,178 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,179 : INFO : built Dictionary(38 unique tokens: ['loans', 'getting', 'qatar', 'actually', 'certify']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:04,193 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:04,194 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,195 : INFO : built Dictionary(29 unique tokens: ['loans', 'give', 'open', 'edge', 'thanks']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:28:04,204 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,205 : INFO : built Dictionary(37 unique tokens: ['loans', 'current', 'open', 'would', 'n']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:04,219 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,220 : INFO : built Dictionary(21 unique tokens: ['bank', 'give', 'ti', 'appreciate', 'personal']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:04,225 : INFO : precomputing L2-norms of word weight vectors\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:04,460 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,462 : INFO : built Dictionary(37 unique tokens: ['enter', 'qatar', 'entry', 'passport', 'form']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:04,477 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,478 : INFO : built Dictionary(27 unique tokens: ['entry', 'maximum', 'per', 'month', 'explain']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:04,487 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,488 : INFO : built Dictionary(23 unique tokens: ['qatar', 'per', 'month', 'explain', 'one']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:04,495 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,496 : INFO : built Dictionary(31 unique tokens: ['entry', 'thanks', 'arrival', 'explain', 'please']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:04,506 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:04,507 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,508 : INFO : built Dictionary(36 unique tokens: ['difference', 'said', 'qatar', 'pls', 'thanks']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:28:04,523 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,524 : INFO : built Dictionary(26 unique tokens: ['reply', 'per', 'explain', 'ticket', 'possible']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:04,532 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,533 : INFO : built Dictionary(51 unique tokens: ['entry', 'please', 'good', 'waiting', 'question']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:28:04,559 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,560 : INFO : built Dictionary(46 unique tokens: ['entry', 'pls', 'need', 'rp', 'please']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:28:04,582 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,583 : INFO : built Dictionary(43 unique tokens: ['info', 'qatar', 'entry', 'days', 'visit']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:04,599 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:04,600 : INFO : built Dictionary(40 unique tokens: ['wife', 'qatar', 'entry', 'extended', 'visit']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:04,616 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:05,912 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:05,914 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,915 : INFO : built Dictionary(37 unique tokens: ['qatar', 'people', 'holy', 'advantage', 'n']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:05,927 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,928 : INFO : built Dictionary(42 unique tokens: ['ramadan', 'drive', 'pls', 'holy', 'course']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:05,942 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,943 : INFO : built Dictionary(26 unique tokens: ['drive', 'authenticate', 'advantage', 'thanks', 'long']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:05,950 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,951 : INFO : built Dictionary(20 unique tokens: ['take', 'driving', 'drive', 'pls', 'holy']...) from 2 documents (total 23 corpus positions)\n", + "2018-09-11 22:28:05,956 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,956 : INFO : built Dictionary(17 unique tokens: ['driving', 'drive', 'pls', 'advantage', 'share']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:28:05,960 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:05,961 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,961 : INFO : built Dictionary(33 unique tokens: ['get', 'pls', 'advantage', 'valid', 'comments']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:05,971 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,972 : INFO : built Dictionary(21 unique tokens: ['attemp', 'driving', 'attemps', 'pls', 'advantage']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:28:05,977 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,978 : INFO : built Dictionary(31 unique tokens: ['drive', 'pls', 'stranded', 'advantage', 'long']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:05,987 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,988 : INFO : built Dictionary(14 unique tokens: ['driving', 'drive', 'pls', 'advantage', 'share']...) from 2 documents (total 17 corpus positions)\n", + "2018-09-11 22:28:05,990 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:05,991 : INFO : built Dictionary(37 unique tokens: ['drive', 'pls', 'month', 'advantage', 'penalty']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:06,002 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:06,285 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,287 : INFO : built Dictionary(31 unique tokens: ['charges', 'ship', 'thanks', 'ebay', 'percentage']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:06,295 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,296 : INFO : built Dictionary(25 unique tokens: ['qatar', 'past', 'chicago', 'nissan', 'one']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:06,302 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,303 : INFO : built Dictionary(50 unique tokens: ['shop', 'someone', 'ship', 'course', 'files']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:28:06,318 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,319 : INFO : built Dictionary(31 unique tokens: ['goods', 'household', 'thanks', 'comments', 'chicago']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:06,327 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,327 : INFO : built Dictionary(25 unique tokens: ['qatar', 'dslr', 'chicago', 'cost', 'things']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:06,333 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,334 : INFO : built Dictionary(42 unique tokens: ['current', 'beyond', 'ship', 'curious', 'showing']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:06,346 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,347 : INFO : built Dictionary(15 unique tokens: ['shop', 'company', 'someone', 'shipping', 'chicago']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:28:06,350 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,351 : INFO : built Dictionary(26 unique tokens: ['liner', 'tragedy', 'long', 'last', 'abandoned']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:06,358 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,359 : INFO : built Dictionary(34 unique tokens: ['friend', 'wife', 'drive', 'also', 'effects']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:06,368 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:06,368 : INFO : built Dictionary(14 unique tokens: ['qatar', 'know', 'company', 'shipping', 'chicago']...) from 2 documents (total 15 corpus positions)\n", + "2018-09-11 22:28:06,371 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:07,680 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:07,682 : INFO : built Dictionary(20 unique tokens: ['working', 'current', 'qatar', 'indian', 'sponsored']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:07,694 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,695 : INFO : built Dictionary(24 unique tokens: ['current', 'get', 'work', 'lady', 'qatar']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:07,700 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,701 : INFO : built Dictionary(42 unique tokens: ['work', 'problem', 'card', 'sponsored', 'pay']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:07,719 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,720 : INFO : built Dictionary(37 unique tokens: ['current', 'qatar', 'experienced', 'visit', 'work']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:28:07,734 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:07,735 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,736 : INFO : built Dictionary(44 unique tokens: ['work', 'lady', 'police', 'acquire', 'migrating']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:07,754 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,755 : INFO : built Dictionary(55 unique tokens: ['work', 'busy', 'current', 'expat', 'lady']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:28:07,781 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,782 : INFO : built Dictionary(29 unique tokens: ['current', 'qatar', 'work', 'legally', 'lady']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:07,790 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,791 : INFO : built Dictionary(51 unique tokens: ['work', 'program', 'salaries', 'lady', 'reply']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:28:07,815 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,816 : INFO : built Dictionary(30 unique tokens: ['current', 'qatar', 'thanks', 'help', 'lady']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:07,824 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:07,825 : INFO : built Dictionary(21 unique tokens: ['working', 'current', 'qatar', 'required', 'indian']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:28:07,828 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:08,050 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,051 : INFO : built Dictionary(12 unique tokens: ['thanks', 'sponsorship', 'transfer', 'please', 'possible']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:28:08,055 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,055 : INFO : built Dictionary(31 unique tokens: ['received', 'would', 'need', 'thanks', 'transfer']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:28:08,064 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,065 : INFO : built Dictionary(24 unique tokens: ['sponsorship', 'thanks', 'normal', 'long', 'possible']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:08,071 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,072 : INFO : built Dictionary(17 unique tokens: ['work', 'three', 'company', 'thanks', 'even']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:28:08,076 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,077 : INFO : built Dictionary(29 unique tokens: ['wife', 'work', 'weeks', 'thanks', 'even']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:08,085 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,086 : INFO : built Dictionary(43 unique tokens: ['receipt', 'wife', 'need', 'thanks', 'could']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:08,099 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,100 : INFO : built Dictionary(21 unique tokens: ['got', 'sponsership', 'provided', 'months', 'company']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:08,105 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,106 : INFO : built Dictionary(18 unique tokens: ['wife', 'work', 'steps', 'thanks', 'sponsorship']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:08,111 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,111 : INFO : built Dictionary(35 unique tokens: ['resign', 'current', 'wife', 'receiving', 'nurse']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:08,121 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:08,122 : INFO : built Dictionary(29 unique tokens: ['work', 'thanks', 'could', 'government', 'transfer']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:08,130 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:09,503 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,505 : INFO : built Dictionary(27 unique tokens: ['body', 'actually', 'thought', 'think', 'hello']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:28:09,526 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,527 : INFO : built Dictionary(39 unique tokens: ['received', 'actually', 'thought', 'moving', 'soon']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:09,540 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,541 : INFO : built Dictionary(36 unique tokens: ['body', 'actually', 'thought', 'think', 'hello']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:09,553 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,554 : INFO : built Dictionary(44 unique tokens: ['ask', 'actually', 'full', 'automobile', 'sleeping']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:28:09,572 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,573 : INFO : built Dictionary(40 unique tokens: ['body', 'actually', 'thanks', 'think', 'hello']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:09,586 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,587 : INFO : built Dictionary(56 unique tokens: ['actually', 'command', 'collection', 'qar', 'please']...) from 2 documents (total 68 corpus positions)\n", + "2018-09-11 22:28:09,617 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,618 : INFO : built Dictionary(48 unique tokens: ['ask', 'actually', 'manager', 'sleeping', 'collection']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:09,639 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,640 : INFO : built Dictionary(64 unique tokens: ['wanna', 'actually', 'sleeping', 'collection', 'reputation']...) from 2 documents (total 75 corpus positions)\n", + "2018-09-11 22:28:09,678 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,679 : INFO : built Dictionary(34 unique tokens: ['body', 'actually', 'sector', 'thought', 'normal']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:09,687 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,688 : INFO : built Dictionary(41 unique tokens: ['thought', 'work', 'qatar', 'actually', 'thanks']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:09,703 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:09,807 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,808 : INFO : built Dictionary(16 unique tokens: ['american', 'degree', 'master', 'products', 'pharmacist']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:28:09,812 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:09,813 : INFO : built Dictionary(31 unique tokens: ['cancelled', 'body', 'council', 'master', 'american']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:09,823 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,824 : INFO : built Dictionary(33 unique tokens: ['reasonable', 'qatar', 'pharmacist', 'say', 'scheme']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:09,835 : INFO : Removed 3 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:09,836 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,836 : INFO : built Dictionary(34 unique tokens: ['qatar', 'salary', 'curious', 'providers', 'american']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:28:09,847 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:09,847 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,848 : INFO : built Dictionary(25 unique tokens: ['qatar', 'viagra', 'thanks', 'pharmacist', 'please']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:09,855 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:09,856 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,856 : INFO : built Dictionary(36 unique tokens: ['getting', 'qatar', 'master', 'thanks', 'pharmacist']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:09,867 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,868 : INFO : built Dictionary(17 unique tokens: ['control', 'degree', 'thanks', 'american', 'pharmacist']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:28:09,872 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:09,872 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,873 : INFO : built Dictionary(30 unique tokens: ['qatar', 'american', 'per', 'pharmacist', 'years']...) from 2 documents (total 38 corpus positions)\n", + "2018-09-11 22:28:09,882 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,883 : INFO : built Dictionary(23 unique tokens: ['qatar', 'insurance', 'cards', 'pharmacist', 'qlers']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:28:09,889 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:09,890 : INFO : built Dictionary(16 unique tokens: ['american', 'qatar', 'find', 'pharmacist', 'years']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:28:09,893 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:11,394 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,395 : INFO : built Dictionary(6 unique tokens: ['open', 'update', 'thanks', 'please', 'advance']...) from 2 documents (total 14 corpus positions)\n", + "2018-09-11 22:28:11,397 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,398 : INFO : built Dictionary(11 unique tokens: ['thanks', 'open', 'update', 'europe', 'advance']...) from 2 documents (total 13 corpus positions)\n", + "2018-09-11 22:28:11,400 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,401 : INFO : built Dictionary(25 unique tokens: ['almarai', 'open', 'actually', 'thanks', 'comes']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:11,406 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,407 : INFO : built Dictionary(16 unique tokens: ['front', 'name', 'gold', 'quick', 'thanks']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:28:11,411 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,411 : INFO : built Dictionary(22 unique tokens: ['open', 'thanks', 'dont', 'please', 'bags']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:28:11,416 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,417 : INFO : built Dictionary(11 unique tokens: ['jarir', 'open', 'lower', 'update', 'thanks']...) from 2 documents (total 13 corpus positions)\n", + "2018-09-11 22:28:11,419 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,420 : INFO : built Dictionary(30 unique tokens: ['difference', 'disagree', 'open', 'customer', 'experienced']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:11,426 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,427 : INFO : built Dictionary(44 unique tokens: ['open', 'night', 'reaching', 'drill', 'please']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:11,438 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:11,439 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,440 : INFO : built Dictionary(20 unique tokens: ['hypermarket', 'open', 'shopping', 'thanks', 'center']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:28:11,444 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,445 : INFO : built Dictionary(29 unique tokens: ['eaten', 'said', 'counter', 'closed', 'thanks']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:11,451 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:11,558 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,559 : INFO : built Dictionary(17 unique tokens: ['register', 'pay', 'much', 'reduce', 'usually']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:28:11,563 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,564 : INFO : built Dictionary(44 unique tokens: ['reduce', 'october', 'entered', 'istiqlal', 'way']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:11,575 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,576 : INFO : built Dictionary(27 unique tokens: ['wasnt', 'reduce', 'drive', 'area', 'states']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:11,583 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,584 : INFO : built Dictionary(40 unique tokens: ['reduce', 'get', 'recording', 'right', 'mistake']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:11,594 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:11,595 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,595 : INFO : built Dictionary(19 unique tokens: ['reduce', 'got', 'attachment', 'hi', 'note']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:28:11,600 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,601 : INFO : built Dictionary(42 unique tokens: ['wife', 'said', 'qatar', 'uae', 'transiting']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:11,612 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,613 : INFO : built Dictionary(50 unique tokens: ['work', 'step', 'almost', 'rules', 'qatar']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:11,627 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,628 : INFO : built Dictionary(37 unique tokens: ['getting', 'qatar', 'people', 'reduce', 'area']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:11,638 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,639 : INFO : built Dictionary(36 unique tokens: ['barely', 'work', 'people', 'nakhla', 'reduce']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:11,648 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:11,648 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:11,649 : INFO : built Dictionary(15 unique tokens: ['within', 'got', 'reduce', 'true', 'know']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:28:11,653 : INFO : precomputing L2-norms of word weight vectors\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:13,140 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,142 : INFO : built Dictionary(27 unique tokens: ['justice', 'open', 'air', 'closed', 'excluding']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:13,150 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,151 : INFO : built Dictionary(26 unique tokens: ['justice', 'missing', 'opening', 'excluding', 'per']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:13,159 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,160 : INFO : built Dictionary(48 unique tokens: ['open', 'opening', 'excluding', 'soon', 'qatar']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:28:13,180 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,181 : INFO : built Dictionary(41 unique tokens: ['justice', 'qatar', 'sizes', 'excluding', 'opening']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:13,197 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:13,198 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,198 : INFO : built Dictionary(28 unique tokens: ['justice', 'hypermarket', 'open', 'opening', 'excluding']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:13,207 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,208 : INFO : built Dictionary(24 unique tokens: ['justice', 'open', 'asia', 'opening', 'area']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:13,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,215 : INFO : built Dictionary(31 unique tokens: ['justice', 'nice', 'qatar', 'asia', 'opening']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:13,225 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,226 : INFO : built Dictionary(29 unique tokens: ['justice', 'open', 'night', 'closed', 'excluding']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:13,234 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,235 : INFO : built Dictionary(32 unique tokens: ['especially', 'justice', 'qatar', 'people', 'opening']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:13,245 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,246 : INFO : built Dictionary(30 unique tokens: ['ask', 'justice', 'open', 'know', 'opening']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:13,329 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,330 : INFO : built Dictionary(26 unique tokens: ['qatar', 'people', 'thanks', 'hello', 'recommend']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:13,338 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,339 : INFO : built Dictionary(38 unique tokens: ['lawyer', 'countries', 'skilled', 'hello', 'please']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:13,351 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,352 : INFO : built Dictionary(35 unique tokens: ['custody', 'qatar', 'california', 'deal', 'law']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:13,364 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,365 : INFO : built Dictionary(25 unique tokens: ['lawyer', 'pls', 'hello', 'recommend', 'please']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:13,371 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,372 : INFO : built Dictionary(37 unique tokens: ['friend', 'qatar', 'true', 'hello', 'court']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:13,384 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,385 : INFO : built Dictionary(20 unique tokens: ['qatar', 'hello', 'lawyers', 'australia', 'recommend']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:28:13,390 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,391 : INFO : built Dictionary(38 unique tokens: ['hello', 'penalty', 'leave', 'also', 'drink']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:13,403 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,404 : INFO : built Dictionary(29 unique tokens: ['lawyers', 'qatar', 'thanks', 'hello', 'one']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:13,412 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,413 : INFO : built Dictionary(37 unique tokens: ['slapped', 'someone', 'reply', 'pls', 'temper']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:13,425 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:13,426 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:13,427 : INFO : built Dictionary(26 unique tokens: ['ask', 'qatar', 'government', 'hello', 'recommend']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:13,434 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:15,120 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,121 : INFO : built Dictionary(25 unique tokens: ['qatar', 'coz', 'exam', 'completed', 'information']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:15,128 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,129 : INFO : built Dictionary(42 unique tokens: ['ask', 'interviews', 'nurse', 'status', 'attend']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:15,146 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,147 : INFO : built Dictionary(30 unique tokens: ['beautiful', 'thanks', 'basics', 'indians', 'completed']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:15,156 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,157 : INFO : built Dictionary(46 unique tokens: ['pls', 'allowance', 'nurse', 'coz', 'free']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:15,175 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,176 : INFO : built Dictionary(32 unique tokens: ['status', 'pls', 'study', 'thanks', 'material']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:15,186 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,187 : INFO : built Dictionary(27 unique tokens: ['getting', 'ok', 'work', 'coz', 'completed']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:15,195 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,196 : INFO : built Dictionary(50 unique tokens: ['status', 'god', 'need', 'braces', 'coz']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:28:15,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,216 : INFO : built Dictionary(39 unique tokens: ['council', 'nurses', 'coz', 'exam', 'completed']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:15,230 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:15,231 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,232 : INFO : built Dictionary(45 unique tokens: ['work', 'nurse', 'coz', 'good', 'asked']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:28:15,249 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:15,250 : INFO : built Dictionary(29 unique tokens: ['qatar', 'need', 'deliver', 'coz', 'completed']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:15,259 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:16,932 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,933 : INFO : built Dictionary(21 unique tokens: ['facebook', 'getting', 'living', 'version', 'annoyed']...) from 2 documents (total 26 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:16,939 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,939 : INFO : built Dictionary(25 unique tokens: ['living', 'version', 'need', 'thanks', 'garmin']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:16,945 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,946 : INFO : built Dictionary(35 unique tokens: ['version', 'work', 'get', 'would', 'helps']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:16,954 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,955 : INFO : built Dictionary(30 unique tokens: ['wanna', 'qatar', 'version', 'purchased', 'cmon']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:28:16,962 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,963 : INFO : built Dictionary(18 unique tokens: ['version', 'prefer', 'living', 'afraid', 'hello']...) from 2 documents (total 19 corpus positions)\n", + "2018-09-11 22:28:16,967 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,968 : INFO : built Dictionary(39 unique tokens: ['version', 'tvs', 'qatar', 'also', 'tell']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:16,977 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,978 : INFO : built Dictionary(24 unique tokens: ['register', 'living', 'group', 'study', 'thanks']...) from 2 documents (total 32 corpus positions)\n", + "2018-09-11 22:28:16,983 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,984 : INFO : built Dictionary(29 unique tokens: ['block', 'version', 'qtel', 'also', 'dont']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:16,991 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:16,992 : INFO : built Dictionary(35 unique tokens: ['version', 'qatar', 'living', 'win', 'except']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:17,000 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:17,001 : INFO : built Dictionary(43 unique tokens: ['negatives', 'android', 'suggest', 'need', 'retina']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:17,011 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:18,703 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,705 : INFO : built Dictionary(40 unique tokens: ['wife', 'give', 'qatar', 'pls', 'tell']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:28:18,718 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,719 : INFO : built Dictionary(30 unique tokens: ['wife', 'get', 'recruiting', 'gone', 'month']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:28:18,727 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,728 : INFO : built Dictionary(35 unique tokens: ['wife', 'counter', 'submission', 'rp', 'message']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:18,739 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,740 : INFO : built Dictionary(38 unique tokens: ['wife', 'reply', 'approve', 'review', 'showing']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:18,750 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,752 : INFO : built Dictionary(28 unique tokens: ['ask', 'wife', 'qatar', 'civil', 'profession']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:18,760 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,761 : INFO : built Dictionary(31 unique tokens: ['wife', 'qatar', 'recruiting', 'civil', 'even']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:18,770 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,771 : INFO : built Dictionary(21 unique tokens: ['wife', 'got', 'application', 'procedures', 'schedule']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:18,776 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:18,777 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,778 : INFO : built Dictionary(32 unique tokens: ['difference', 'said', 'qatar', 'pls', 'wife']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:18,788 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:18,788 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,789 : INFO : built Dictionary(46 unique tokens: ['wife', 'recruiting', 'civil', 'rp', 'please']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:28:18,804 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:18,805 : INFO : built Dictionary(21 unique tokens: ['wife', 'qatar', 'appreciate', 'visa', 'procedures']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:18,811 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:20,491 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,492 : INFO : built Dictionary(32 unique tokens: ['qatar', 'body', 'administration', 'changing', 'manager']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:20,502 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,503 : INFO : built Dictionary(26 unique tokens: ['ask', 'wife', 'qatar', 'civil', 'profession']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:20,510 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,511 : INFO : built Dictionary(20 unique tokens: ['friend', 'driving', 'licence', 'goverment', 'said']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:20,516 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,517 : INFO : built Dictionary(41 unique tokens: ['wife', 'supervisor', 'get', 'allow', 'answers']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:28:20,530 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,531 : INFO : built Dictionary(24 unique tokens: ['qualify', 'qatar', 'stated', 'arrival', 'yes']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:20,537 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,538 : INFO : built Dictionary(39 unique tokens: ['anxious', 'qatar', 'pls', 'nurse', 'per']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:20,549 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,550 : INFO : built Dictionary(41 unique tokens: ['living', 'awaiting', 'thanks', 'residency', 'salaries']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:20,561 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,562 : INFO : built Dictionary(32 unique tokens: ['moving', 'comments', 'commute', 'qatar', 'called']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:20,571 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,572 : INFO : built Dictionary(31 unique tokens: ['especially', 'get', 'literature', 'need', 'thanks']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:20,580 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:20,581 : INFO : built Dictionary(20 unique tokens: ['qatar', 'visa', 'qatarized', 'qatarization', 'permanent']...) from 2 documents (total 26 corpus positions)\n", + "2018-09-11 22:28:20,586 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:22,241 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,242 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,243 : INFO : built Dictionary(36 unique tokens: ['loans', 'get', 'open', 'necessity', 'choices']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:22,253 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:22,254 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,255 : INFO : built Dictionary(42 unique tokens: ['resign', 'give', 'cover', 'number', 'cash']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:28:22,268 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,269 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,270 : INFO : built Dictionary(26 unique tokens: ['qatar', 'open', 'edge', 'thanks', 'roughly']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:22,277 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,278 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,279 : INFO : built Dictionary(45 unique tokens: ['work', 'open', 'c', 'better', 'get']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:28:22,293 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,294 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,295 : INFO : built Dictionary(44 unique tokens: ['unable', 'nri', 'better', 'indians', 'icici']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:28:22,309 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,310 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,310 : INFO : built Dictionary(21 unique tokens: ['bank', 'kindly', 'open', 'thanks', 'get']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:22,316 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,317 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,318 : INFO : built Dictionary(32 unique tokens: ['work', 'problem', 'pay', 'qatar', 'mean']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:22,327 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,328 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,328 : INFO : built Dictionary(28 unique tokens: ['get', 'charges', 'warm', 'say', 'hav']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:28:22,336 : INFO : Removed 0 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,336 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,337 : INFO : built Dictionary(42 unique tokens: ['info', 'poll', 'living', 'people', 'weeks']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:22,349 : INFO : Removed 1 and 1 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:22,350 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:22,351 : INFO : built Dictionary(34 unique tokens: ['side', 'country', 'break', 'qnb', 'cards']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:22,361 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:24,026 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,028 : INFO : built Dictionary(19 unique tokens: ['couldnot', 'way', 'went', 'vacation', 'renew']...) from 2 documents (total 24 corpus positions)\n", + "2018-09-11 22:28:24,032 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,033 : INFO : built Dictionary(41 unique tokens: ['serial', 'qatar', 'explain', 'even', 'qatari']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:24,043 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,044 : INFO : built Dictionary(32 unique tokens: ['wife', 'qatar', 'days', 'tell', 'civil']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:24,051 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:24,052 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,052 : INFO : built Dictionary(39 unique tokens: ['someone', 'get', 'days', 'tell', 'thanks']...) from 2 documents (total 50 corpus positions)\n", + "2018-09-11 22:28:24,062 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,063 : INFO : built Dictionary(36 unique tokens: ['rp', 'payroll', 'thanks', 'completed', 'present']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:24,071 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,072 : INFO : built Dictionary(31 unique tokens: ['body', 'days', 'administration', 'changing', 'manager']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:24,079 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,080 : INFO : built Dictionary(17 unique tokens: ['bank', 'doha', 'indian', 'days', 'dont']...) from 2 documents (total 21 corpus positions)\n", + "2018-09-11 22:28:24,083 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,084 : INFO : built Dictionary(34 unique tokens: ['plz', 'qatar', 'actually', 'tell', 'afternoon']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:24,092 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,093 : INFO : built Dictionary(30 unique tokens: ['taken', 'days', 'passport', 'weeks', 'hello']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:24,100 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:24,101 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:24,102 : INFO : built Dictionary(32 unique tokens: ['qatar', 'steps', 'follow', 'thank', 'also']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:24,109 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:25,800 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,802 : INFO : built Dictionary(38 unique tokens: ['jan', 'know', 'passport', 'form', 'would']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:25,817 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:25,818 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,819 : INFO : built Dictionary(44 unique tokens: ['said', 'renewal', 'better', 'n', 'told']...) from 2 documents (total 64 corpus positions)\n", + "2018-09-11 22:28:25,839 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,840 : INFO : built Dictionary(25 unique tokens: ['qatar', 'hotel', 'n', 'month', 'one']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:25,847 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,848 : INFO : built Dictionary(39 unique tokens: ['wife', 'days', 'n', 'visit', 'month']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:25,865 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,866 : INFO : built Dictionary(39 unique tokens: ['ask', 'expired', 'n', 'visit', 'time']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:25,883 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,884 : INFO : built Dictionary(36 unique tokens: ['days', 'like', 'maximum', 'n', 'visit']...) from 2 documents (total 58 corpus positions)\n", + "2018-09-11 22:28:25,901 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:25,902 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,902 : INFO : built Dictionary(47 unique tokens: ['would', 'n', 'rules', 'appreciated', 'months']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:28:25,925 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,926 : INFO : built Dictionary(27 unique tokens: ['would', 'know', 'n', 'month', 'please']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:25,935 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,936 : INFO : built Dictionary(36 unique tokens: ['entry', 'qa', 'n', 'also', 'qatar']...) from 2 documents (total 50 corpus positions)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:25,950 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:25,951 : INFO : built Dictionary(31 unique tokens: ['know', 'thanks', 'arrival', 'would', 'like']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:25,962 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:27,649 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,650 : INFO : built Dictionary(30 unique tokens: ['entry', 'qa', 'need', 'month', 'also']...) from 2 documents (total 46 corpus positions)\n", + "2018-09-11 22:28:27,660 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,661 : INFO : built Dictionary(32 unique tokens: ['get', 'right', 'long', 'july', 'travel']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:27,671 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,672 : INFO : built Dictionary(24 unique tokens: ['get', 'thanks', 'arrival', 'month', 'spend']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:27,680 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,681 : INFO : built Dictionary(37 unique tokens: ['work', 'problem', 'visit', 'pay', 'month']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:27,692 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,693 : INFO : built Dictionary(34 unique tokens: ['visited', 'get', 'pls', 'month', 'thanks']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:27,705 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,706 : INFO : built Dictionary(43 unique tokens: ['living', 'indian', 'month', 'awaiting', 'thanks']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:27,721 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,722 : INFO : built Dictionary(25 unique tokens: ['qatar', 'pls', 'link', 'month', 'thanks']...) from 2 documents (total 33 corpus positions)\n", + "2018-09-11 22:28:27,729 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,730 : INFO : built Dictionary(47 unique tokens: ['bundle', 'ok', 'forgets', 'care', 'card']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:27,747 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,749 : INFO : built Dictionary(32 unique tokens: ['permits', 'permit', 'month', 'means', 'â']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:27,759 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:27,760 : INFO : built Dictionary(28 unique tokens: ['qatar', 'need', 'thanks', 'visit', 'month']...) from 2 documents (total 37 corpus positions)\n", + "2018-09-11 22:28:27,768 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:29,456 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,458 : INFO : built Dictionary(27 unique tokens: ['reply', 'would', 'thanks', 'travel', 'qatar']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:29,467 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,468 : INFO : built Dictionary(35 unique tokens: ['get', 'air', 'thanks', 'doha', 'would']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:29,479 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,480 : INFO : built Dictionary(33 unique tokens: ['get', 'would', 'thanks', 'right', 'long']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:29,492 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,493 : INFO : built Dictionary(25 unique tokens: ['get', 'know', 'thanks', 'even', 'travel']...) from 2 documents (total 40 corpus positions)\n", + "2018-09-11 22:28:29,502 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:29,503 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,503 : INFO : built Dictionary(49 unique tokens: ['cancelled', 'release', 'manager', 'rp', 'maybe']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:29,524 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,525 : INFO : built Dictionary(34 unique tokens: ['get', 'travel', 'almost', 'shocked', 'say']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:29,537 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,538 : INFO : built Dictionary(37 unique tokens: ['get', 'travel', 'thanks', 'could', 'currently']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:29,552 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,553 : INFO : built Dictionary(33 unique tokens: ['info', 'get', 'would', 'read', 'thanks']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:29,564 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,565 : INFO : built Dictionary(27 unique tokens: ['work', 'get', 'travel', 'thanks', 'thankful']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:29,575 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:29,576 : INFO : built Dictionary(41 unique tokens: ['would', 'affidavit', 'qatar', 'approve', 'tell']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:28:29,591 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:31,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,283 : INFO : built Dictionary(27 unique tokens: ['book', 'qatar', 'pls', 'visit', 'hello']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:31,290 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,291 : INFO : built Dictionary(45 unique tokens: ['pls', 'exam', 'please', 'advise', 'apply']...) from 2 documents (total 63 corpus positions)\n", + "2018-09-11 22:28:31,312 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,313 : INFO : built Dictionary(47 unique tokens: ['permit', 'pls', 'c', 'visit', 'exam']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:28:31,332 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,333 : INFO : built Dictionary(32 unique tokens: ['qatar', 'pls', 'hello', 'back', 'month']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:28:31,344 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,345 : INFO : built Dictionary(43 unique tokens: ['info', 'qatar', 'days', 'hello', 'back']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:31,363 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,364 : INFO : built Dictionary(43 unique tokens: ['take', 'qatar', 'body', 'pls', 'visit']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:31,382 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:31,383 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,384 : INFO : built Dictionary(39 unique tokens: ['difference', 'said', 'qatar', 'pls', 'hello']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:28:31,401 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,402 : INFO : built Dictionary(48 unique tokens: ['pls', 'need', 'still', 'exam', 'rp']...) from 2 documents (total 69 corpus positions)\n", + "2018-09-11 22:28:31,425 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,426 : INFO : built Dictionary(42 unique tokens: ['wife', 'qatar', 'pls', 'hello', 'visit']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:28:31,444 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:31,445 : INFO : built Dictionary(31 unique tokens: ['getting', 'qatar', 'pls', 'ok', 'work']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:31,454 : INFO : precomputing L2-norms of word weight vectors\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:33,135 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,136 : INFO : built Dictionary(37 unique tokens: ['wife', 'qatar', 'follow', 'tell', 'civil']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:28:33,153 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,154 : INFO : built Dictionary(41 unique tokens: ['ask', 'wife', 'qatar', 'would', 'follow']...) from 2 documents (total 72 corpus positions)\n", + "2018-09-11 22:28:33,173 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,175 : INFO : built Dictionary(31 unique tokens: ['wife', 'qatar', 'follow', 'hello', 'son']...) from 2 documents (total 47 corpus positions)\n", + "2018-09-11 22:28:33,186 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,187 : INFO : built Dictionary(29 unique tokens: ['wife', 'qatar', 'follow', 'hello', 'son']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:33,197 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,198 : INFO : built Dictionary(39 unique tokens: ['wife', 'qatar', 'uae', 'follow', 'tell']...) from 2 documents (total 59 corpus positions)\n", + "2018-09-11 22:28:33,214 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:33,215 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,216 : INFO : built Dictionary(39 unique tokens: ['difference', 'said', 'qatar', 'pls', 'follow']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:28:33,232 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:33,233 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,234 : INFO : built Dictionary(45 unique tokens: ['rules', 'son', 'please', 'appreciated', 'months']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:28:33,256 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:33,257 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,258 : INFO : built Dictionary(46 unique tokens: ['tell', 'son', 'day', 'birth', 'required']...) from 2 documents (total 67 corpus positions)\n", + "2018-09-11 22:28:33,280 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,282 : INFO : built Dictionary(43 unique tokens: ['wife', 'qatar', 'follow', 'soon', 'totally']...) from 2 documents (total 57 corpus positions)\n", + "2018-09-11 22:28:33,301 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:33,302 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:33,303 : INFO : built Dictionary(39 unique tokens: ['wife', 'get', 'baldiya', 'follow', 'visit']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:33,319 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:35,001 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,002 : INFO : built Dictionary(50 unique tokens: ['know', 'second', 'qa', 'qar', 'please']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:28:35,026 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,028 : INFO : built Dictionary(38 unique tokens: ['reasonable', 'getting', 'qatar', 'qa', 'good']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:35,042 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,043 : INFO : built Dictionary(33 unique tokens: ['getting', 'qatar', 'qa', 'accommodation', 'terms']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:35,056 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,057 : INFO : built Dictionary(55 unique tokens: ['know', 'following', 'garage', 'second', 'qa']...) from 2 documents (total 84 corpus positions)\n", + "2018-09-11 22:28:35,084 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,085 : INFO : built Dictionary(44 unique tokens: ['would', 'second', 'qa', 'qar', 'please']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:35,104 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,105 : INFO : built Dictionary(44 unique tokens: ['full', 'benefits', 'qa', 'qar', 'accommodation']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:35,124 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,125 : INFO : built Dictionary(48 unique tokens: ['c', 'benefits', 'forward', 'qa', 'maybe']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:28:35,148 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,149 : INFO : built Dictionary(41 unique tokens: ['wife', 'ant', 'qa', 'thanks', 'getting']...) from 2 documents (total 53 corpus positions)\n", + "2018-09-11 22:28:35,166 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,168 : INFO : built Dictionary(53 unique tokens: ['ask', 'second', 'evening', 'went', 'qar']...) from 2 documents (total 65 corpus positions)\n", + "2018-09-11 22:28:35,193 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:35,194 : INFO : built Dictionary(57 unique tokens: ['pls', 'second', 'agent', 'qa', 'qar']...) from 2 documents (total 73 corpus positions)\n", + "2018-09-11 22:28:35,223 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:36,914 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,915 : INFO : built Dictionary(32 unique tokens: ['cancelled', 'qatar', 'wait', 'cancel', 'month']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:36,926 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,927 : INFO : built Dictionary(20 unique tokens: ['qatar', 'sponsorship', 'passport', 'procedure', 'even']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:28:36,932 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,933 : INFO : built Dictionary(51 unique tokens: ['cancelled', 'permit', 'need', 'slow', 'work']...) from 2 documents (total 66 corpus positions)\n", + "2018-09-11 22:28:36,953 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:36,954 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,955 : INFO : built Dictionary(48 unique tokens: ['cancelled', 'finally', 'release', 'manager', 'rp']...) from 2 documents (total 55 corpus positions)\n", + "2018-09-11 22:28:36,973 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,975 : INFO : built Dictionary(34 unique tokens: ['permits', 'permit', 'qatar', 'rp', 'means']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:36,986 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,987 : INFO : built Dictionary(22 unique tokens: ['ask', 'cancelled', 'qatar', 'rules', 'cancel']...) from 2 documents (total 29 corpus positions)\n", + "2018-09-11 22:28:36,993 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:36,994 : INFO : built Dictionary(44 unique tokens: ['pls', 'need', 'rp', 'finally', 'please']...) from 2 documents (total 60 corpus positions)\n", + "2018-09-11 22:28:37,011 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:37,012 : INFO : built Dictionary(25 unique tokens: ['reply', 'sponsorship', 'cancel', 'rp', 'finally']...) from 2 documents (total 41 corpus positions)\n", + "2018-09-11 22:28:37,019 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:37,020 : INFO : built Dictionary(37 unique tokens: ['work', 'problem', 'sponsorship', 'pay', 'cancel']...) from 2 documents (total 45 corpus positions)\n", + "2018-09-11 22:28:37,032 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:37,033 : INFO : built Dictionary(37 unique tokens: ['wife', 'work', 'qatar', 'weeks', 'thanks']...) from 2 documents (total 44 corpus positions)\n", + "2018-09-11 22:28:37,046 : INFO : precomputing L2-norms of word weight vectors\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-09-11 22:28:38,734 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,736 : INFO : built Dictionary(43 unique tokens: ['info', 'poll', 'living', 'people', 'weeks']...) from 2 documents (total 61 corpus positions)\n", + "2018-09-11 22:28:38,752 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,753 : INFO : built Dictionary(25 unique tokens: ['open', 'customer', 'thanks', 'soon', 'please']...) from 2 documents (total 39 corpus positions)\n", + "2018-09-11 22:28:38,760 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:38,761 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,762 : INFO : built Dictionary(29 unique tokens: ['open', 'customer', 'thanks', 'roughly', 'stll']...) from 2 documents (total 43 corpus positions)\n", + "2018-09-11 22:28:38,770 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,771 : INFO : built Dictionary(39 unique tokens: ['loans', 'open', 'necessity', 'customer', 'choices']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:28:38,785 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,786 : INFO : built Dictionary(31 unique tokens: ['qatar', 'opening', 'customer', 'thanks', 'cbq']...) from 2 documents (total 54 corpus positions)\n", + "2018-09-11 22:28:38,797 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,798 : INFO : built Dictionary(32 unique tokens: ['open', 'qnb', 'better', 'thanks', 'long']...) from 2 documents (total 49 corpus positions)\n", + "2018-09-11 22:28:38,807 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,809 : INFO : built Dictionary(18 unique tokens: ['bank', 'qatar', 'great', 'anybody', 'personal']...) from 2 documents (total 31 corpus positions)\n", + "2018-09-11 22:28:38,813 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,814 : INFO : built Dictionary(34 unique tokens: ['work', 'problem', 'customer', 'pay', 'qatar']...) from 2 documents (total 51 corpus positions)\n", + "2018-09-11 22:28:38,825 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,826 : INFO : built Dictionary(40 unique tokens: ['best', 'get', 'deposit', 'need', 'thanks']...) from 2 documents (total 62 corpus positions)\n", + "2018-09-11 22:28:38,841 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:38,842 : INFO : built Dictionary(35 unique tokens: ['track', 'customer', 'thanks', 'free', 'funds']...) from 2 documents (total 52 corpus positions)\n", + "2018-09-11 22:28:38,854 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:40,537 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,538 : INFO : built Dictionary(43 unique tokens: ['vf', 'invited', 'p', 'yes', 'enjoy']...) from 2 documents (total 56 corpus positions)\n", + "2018-09-11 22:28:40,549 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,550 : INFO : built Dictionary(13 unique tokens: ['reply', 'fire', 'corniche', 'enjoy', 'qatar']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:28:40,553 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,554 : INFO : built Dictionary(11 unique tokens: ['hear', 'fire', 'corniche', 'time', 'distance']...) from 2 documents (total 17 corpus positions)\n", + "2018-09-11 22:28:40,556 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,557 : INFO : built Dictionary(13 unique tokens: ['get', 'fire', 'corniche', 'time', 'awesome']...) from 2 documents (total 16 corpus positions)\n", + "2018-09-11 22:28:40,559 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,560 : INFO : built Dictionary(7 unique tokens: ['dec', 'fire', 'corniche', 'time', 'scheduled']...) from 2 documents (total 11 corpus positions)\n", + "2018-09-11 22:28:40,561 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,562 : INFO : built Dictionary(34 unique tokens: ['dec', 'problem', 'fire', 'military', 'knowing']...) from 2 documents (total 48 corpus positions)\n", + "2018-09-11 22:28:40,569 : INFO : Removed 1 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:40,570 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,571 : INFO : built Dictionary(21 unique tokens: ['body', 'fire', 'whey', 'stopping', 'time']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:28:40,575 : INFO : Removed 2 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:40,576 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,576 : INFO : built Dictionary(13 unique tokens: ['eid', 'fire', 'c', 'corniche', 'e']...) from 2 documents (total 16 corpus positions)\n", + "2018-09-11 22:28:40,579 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,580 : INFO : built Dictionary(22 unique tokens: ['people', 'enjoy', 'day', 'different', 'fire']...) from 2 documents (total 27 corpus positions)\n", + "2018-09-11 22:28:40,584 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:40,585 : INFO : built Dictionary(20 unique tokens: ['police', 'got', 'night', 'closed', 'corniche']...) from 2 documents (total 25 corpus positions)\n", + "2018-09-11 22:28:40,589 : INFO : precomputing L2-norms of word weight vectors\n", + "2018-09-11 22:28:42,243 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,244 : INFO : built Dictionary(35 unique tokens: ['signs', 'problem', 'people', 'back', 'deleted']...) from 2 documents (total 42 corpus positions)\n", + "2018-09-11 22:28:42,256 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,257 : INFO : built Dictionary(27 unique tokens: ['qatar', 'scares', 'people', 'written', 'right']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:42,265 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,266 : INFO : built Dictionary(18 unique tokens: ['name', 'rp', 'written', 'inform', 'qatar']...) from 2 documents (total 22 corpus positions)\n", + "2018-09-11 22:28:42,269 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,270 : INFO : built Dictionary(32 unique tokens: ['friend', 'problem', 'people', 'back', 'thanks']...) from 2 documents (total 36 corpus positions)\n", + "2018-09-11 22:28:42,280 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,281 : INFO : built Dictionary(31 unique tokens: ['living', 'inform', 'shamelessly', 'breaking', 'topic']...) from 2 documents (total 34 corpus positions)\n", + "2018-09-11 22:28:42,290 : INFO : Removed 3 and 0 OOV words from document 1 and 2 (respectively).\n", + "2018-09-11 22:28:42,291 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,292 : INFO : built Dictionary(29 unique tokens: ['atmosphere', 'rp', 'gulf', 'thanks', 'could']...) from 2 documents (total 35 corpus positions)\n", + "2018-09-11 22:28:42,300 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,301 : INFO : built Dictionary(25 unique tokens: ['calvin', 'qatar', 'rp', 'colognes', 'inform']...) from 2 documents (total 28 corpus positions)\n", + "2018-09-11 22:28:42,308 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,309 : INFO : built Dictionary(17 unique tokens: ['got', 'instead', 'rp', 'seen', 'inform']...) from 2 documents (total 20 corpus positions)\n", + "2018-09-11 22:28:42,311 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,312 : INFO : built Dictionary(27 unique tokens: ['problem', 'shopping', 'tell', 'inform', 'got']...) from 2 documents (total 30 corpus positions)\n", + "2018-09-11 22:28:42,319 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n", + "2018-09-11 22:28:42,320 : INFO : built Dictionary(34 unique tokens: ['nokia', 'camera', 'know', 'written', 'inform']...) from 2 documents (total 44 corpus positions)\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.02 s, sys: 5.2 s, total: 7.22 s\n", - "Wall time: 2min 38s\n" + "CPU times: user 2.14 s, sys: 5.08 s, total: 7.22 s\n", + "Wall time: 2min 51s\n" ] } ], @@ -568,22 +7387,22 @@ "\n", "Dataset | Strategy | MAP score | Elapsed time (sec)\n", ":---|:---|:---|---:\n", - "2016-test|softcossim|78.57 ±10.86|3.91 ±0.51\n", + "2016-test|softcossim|77.15 ±10.83|4.48 ±0.56\n", "2016-test|**Winner (UH-PRHLT-primary)**|76.70 ±0.00|\n", - "2016-test|cossim|76.45 ±10.40|0.22 ±0.04\n", - "2016-test|wmd-gensim|76.04 ±11.54|12.45 ±1.29\n", + "2016-test|cossim|76.45 ±10.40|0.25 ±0.04\n", + "2016-test|wmd-gensim|76.15 ±11.51|13.79 ±1.39\n", "2016-test|**Baseline 1 (IR)**|74.75 ±0.00|\n", - "2016-test|wmd-relax|73.68 ±12.69|0.33 ±0.08\n", + "2016-test|wmd-relax|72.03 ±11.33|0.34 ±0.07\n", "2016-test|**Baseline 2 (random)**|46.98 ±0.00|\n", "\n", "\n", "Dataset | Strategy | MAP score | Elapsed time (sec)\n", ":---|:---|:---|---:\n", "2017-test|**Winner (SimBow-primary)**|47.22 ±0.00|\n", - "2017-test|wmd-relax|44.83 ±16.54|0.38 ±0.08\n", - "2017-test|softcossim|44.44 ±16.07|5.17 ±0.83\n", - "2017-test|cossim|44.38 ±14.71|0.27 ±0.04\n", - "2017-test|wmd-gensim|43.85 ±15.53|15.36 ±1.92\n", + "2017-test|wmd-relax|45.04 ±15.44|0.39 ±0.07\n", + "2017-test|cossim|44.38 ±14.71|0.29 ±0.05\n", + "2017-test|softcossim|44.25 ±15.68|4.89 ±0.80\n", + "2017-test|wmd-gensim|44.08 ±15.96|16.69 ±1.90\n", "2017-test|**Baseline 1 (IR)**|41.85 ±0.00|\n", "2017-test|**Baseline 2 (random)**|29.81 ±0.00|" ],