diff --git a/docs/notebooks/doc2vec-lee.ipynb b/docs/notebooks/doc2vec-lee.ipynb index fd3d1702dc..cc6279fe86 100644 --- a/docs/notebooks/doc2vec-lee.ipynb +++ b/docs/notebooks/doc2vec-lee.ipynb @@ -138,8 +138,8 @@ { "data": { "text/plain": [ - "[TaggedDocument(words=['hundreds', 'of', 'people', 'have', 'been', 'forced', 'to', 'vacate', 'their', 'homes', 'in', 'the', 'southern', 'highlands', 'of', 'new', 'south', 'wales', 'as', 'strong', 'winds', 'today', 'pushed', 'huge', 'bushfire', 'towards', 'the', 'town', 'of', 'hill', 'top', 'new', 'blaze', 'near', 'goulburn', 'south', 'west', 'of', 'sydney', 'has', 'forced', 'the', 'closure', 'of', 'the', 'hume', 'highway', 'at', 'about', 'pm', 'aedt', 'marked', 'deterioration', 'in', 'the', 'weather', 'as', 'storm', 'cell', 'moved', 'east', 'across', 'the', 'blue', 'mountains', 'forced', 'authorities', 'to', 'make', 'decision', 'to', 'evacuate', 'people', 'from', 'homes', 'in', 'outlying', 'streets', 'at', 'hill', 'top', 'in', 'the', 'new', 'south', 'wales', 'southern', 'highlands', 'an', 'estimated', 'residents', 'have', 'left', 'their', 'homes', 'for', 'nearby', 'mittagong', 'the', 'new', 'south', 'wales', 'rural', 'fire', 'service', 'says', 'the', 'weather', 'conditions', 'which', 'caused', 'the', 'fire', 'to', 'burn', 'in', 'finger', 'formation', 'have', 'now', 'eased', 'and', 'about', 'fire', 'units', 'in', 'and', 'around', 'hill', 'top', 'are', 'optimistic', 'of', 'defending', 'all', 'properties', 'as', 'more', 'than', 'blazes', 'burn', 'on', 'new', 'year', 'eve', 'in', 'new', 'south', 'wales', 'fire', 'crews', 'have', 'been', 'called', 'to', 'new', 'fire', 'at', 'gunning', 'south', 'of', 'goulburn', 'while', 'few', 'details', 'are', 'available', 'at', 'this', 'stage', 'fire', 'authorities', 'says', 'it', 'has', 'closed', 'the', 'hume', 'highway', 'in', 'both', 'directions', 'meanwhile', 'new', 'fire', 'in', 'sydney', 'west', 'is', 'no', 'longer', 'threatening', 'properties', 'in', 'the', 'cranebrook', 'area', 'rain', 'has', 'fallen', 'in', 'some', 'parts', 'of', 'the', 'illawarra', 'sydney', 'the', 'hunter', 'valley', 'and', 'the', 'north', 'coast', 'but', 'the', 'bureau', 'of', 'meteorology', 'claire', 'richards', 'says', 'the', 'rain', 'has', 'done', 'little', 'to', 'ease', 'any', 'of', 'the', 'hundred', 'fires', 'still', 'burning', 'across', 'the', 'state', 'the', 'falls', 'have', 'been', 'quite', 'isolated', 'in', 'those', 'areas', 'and', 'generally', 'the', 'falls', 'have', 'been', 'less', 'than', 'about', 'five', 'millimetres', 'she', 'said', 'in', 'some', 'places', 'really', 'not', 'significant', 'at', 'all', 'less', 'than', 'millimetre', 'so', 'there', 'hasn', 'been', 'much', 'relief', 'as', 'far', 'as', 'rain', 'is', 'concerned', 'in', 'fact', 'they', 've', 'probably', 'hampered', 'the', 'efforts', 'of', 'the', 'firefighters', 'more', 'because', 'of', 'the', 'wind', 'gusts', 'that', 'are', 'associated', 'with', 'those', 'thunderstorms'], tags=[0]),\n", - " TaggedDocument(words=['indian', 'security', 'forces', 'have', 'shot', 'dead', 'eight', 'suspected', 'militants', 'in', 'night', 'long', 'encounter', 'in', 'southern', 'kashmir', 'the', 'shootout', 'took', 'place', 'at', 'dora', 'village', 'some', 'kilometers', 'south', 'of', 'the', 'kashmiri', 'summer', 'capital', 'srinagar', 'the', 'deaths', 'came', 'as', 'pakistani', 'police', 'arrested', 'more', 'than', 'two', 'dozen', 'militants', 'from', 'extremist', 'groups', 'accused', 'of', 'staging', 'an', 'attack', 'on', 'india', 'parliament', 'india', 'has', 'accused', 'pakistan', 'based', 'lashkar', 'taiba', 'and', 'jaish', 'mohammad', 'of', 'carrying', 'out', 'the', 'attack', 'on', 'december', 'at', 'the', 'behest', 'of', 'pakistani', 'military', 'intelligence', 'military', 'tensions', 'have', 'soared', 'since', 'the', 'raid', 'with', 'both', 'sides', 'massing', 'troops', 'along', 'their', 'border', 'and', 'trading', 'tit', 'for', 'tat', 'diplomatic', 'sanctions', 'yesterday', 'pakistan', 'announced', 'it', 'had', 'arrested', 'lashkar', 'taiba', 'chief', 'hafiz', 'mohammed', 'saeed', 'police', 'in', 'karachi', 'say', 'it', 'is', 'likely', 'more', 'raids', 'will', 'be', 'launched', 'against', 'the', 'two', 'groups', 'as', 'well', 'as', 'other', 'militant', 'organisations', 'accused', 'of', 'targetting', 'india', 'military', 'tensions', 'between', 'india', 'and', 'pakistan', 'have', 'escalated', 'to', 'level', 'not', 'seen', 'since', 'their', 'war'], tags=[1])]" + "[TaggedDocument(words=[u'hundreds', u'of', u'people', u'have', u'been', u'forced', u'to', u'vacate', u'their', u'homes', u'in', u'the', u'southern', u'highlands', u'of', u'new', u'south', u'wales', u'as', u'strong', u'winds', u'today', u'pushed', u'huge', u'bushfire', u'towards', u'the', u'town', u'of', u'hill', u'top', u'new', u'blaze', u'near', u'goulburn', u'south', u'west', u'of', u'sydney', u'has', u'forced', u'the', u'closure', u'of', u'the', u'hume', u'highway', u'at', u'about', u'pm', u'aedt', u'marked', u'deterioration', u'in', u'the', u'weather', u'as', u'storm', u'cell', u'moved', u'east', u'across', u'the', u'blue', u'mountains', u'forced', u'authorities', u'to', u'make', u'decision', u'to', u'evacuate', u'people', u'from', u'homes', u'in', u'outlying', u'streets', u'at', u'hill', u'top', u'in', u'the', u'new', u'south', u'wales', u'southern', u'highlands', u'an', u'estimated', u'residents', u'have', u'left', u'their', u'homes', u'for', u'nearby', u'mittagong', u'the', u'new', u'south', u'wales', u'rural', u'fire', u'service', u'says', u'the', u'weather', u'conditions', u'which', u'caused', u'the', u'fire', u'to', u'burn', u'in', u'finger', u'formation', u'have', u'now', u'eased', u'and', u'about', u'fire', u'units', u'in', u'and', u'around', u'hill', u'top', u'are', u'optimistic', u'of', u'defending', u'all', u'properties', u'as', u'more', u'than', u'blazes', u'burn', u'on', u'new', u'year', u'eve', u'in', u'new', u'south', u'wales', u'fire', u'crews', u'have', u'been', u'called', u'to', u'new', u'fire', u'at', u'gunning', u'south', u'of', u'goulburn', u'while', u'few', u'details', u'are', u'available', u'at', u'this', u'stage', u'fire', u'authorities', u'says', u'it', u'has', u'closed', u'the', u'hume', u'highway', u'in', u'both', u'directions', u'meanwhile', u'new', u'fire', u'in', u'sydney', u'west', u'is', u'no', u'longer', u'threatening', u'properties', u'in', u'the', u'cranebrook', u'area', u'rain', u'has', u'fallen', u'in', u'some', u'parts', u'of', u'the', u'illawarra', u'sydney', u'the', u'hunter', u'valley', u'and', u'the', u'north', u'coast', u'but', u'the', u'bureau', u'of', u'meteorology', u'claire', u'richards', u'says', u'the', u'rain', u'has', u'done', u'little', u'to', u'ease', u'any', u'of', u'the', u'hundred', u'fires', u'still', u'burning', u'across', u'the', u'state', u'the', u'falls', u'have', u'been', u'quite', u'isolated', u'in', u'those', u'areas', u'and', u'generally', u'the', u'falls', u'have', u'been', u'less', u'than', u'about', u'five', u'millimetres', u'she', u'said', u'in', u'some', u'places', u'really', u'not', u'significant', u'at', u'all', u'less', u'than', u'millimetre', u'so', u'there', u'hasn', u'been', u'much', u'relief', u'as', u'far', u'as', u'rain', u'is', u'concerned', u'in', u'fact', u'they', u've', u'probably', u'hampered', u'the', u'efforts', u'of', u'the', u'firefighters', u'more', u'because', u'of', u'the', u'wind', u'gusts', u'that', u'are', u'associated', u'with', u'those', u'thunderstorms'], tags=[0]),\n", + " TaggedDocument(words=[u'indian', u'security', u'forces', u'have', u'shot', u'dead', u'eight', u'suspected', u'militants', u'in', u'night', u'long', u'encounter', u'in', u'southern', u'kashmir', u'the', u'shootout', u'took', u'place', u'at', u'dora', u'village', u'some', u'kilometers', u'south', u'of', u'the', u'kashmiri', u'summer', u'capital', u'srinagar', u'the', u'deaths', u'came', u'as', u'pakistani', u'police', u'arrested', u'more', u'than', u'two', u'dozen', u'militants', u'from', u'extremist', u'groups', u'accused', u'of', u'staging', u'an', u'attack', u'on', u'india', u'parliament', u'india', u'has', u'accused', u'pakistan', u'based', u'lashkar', u'taiba', u'and', u'jaish', u'mohammad', u'of', u'carrying', u'out', u'the', u'attack', u'on', u'december', u'at', u'the', u'behest', u'of', u'pakistani', u'military', u'intelligence', u'military', u'tensions', u'have', u'soared', u'since', u'the', u'raid', u'with', u'both', u'sides', u'massing', u'troops', u'along', u'their', u'border', u'and', u'trading', u'tit', u'for', u'tat', u'diplomatic', u'sanctions', u'yesterday', u'pakistan', u'announced', u'it', u'had', u'arrested', u'lashkar', u'taiba', u'chief', u'hafiz', u'mohammed', u'saeed', u'police', u'in', u'karachi', u'say', u'it', u'is', u'likely', u'more', u'raids', u'will', u'be', u'launched', u'against', u'the', u'two', u'groups', u'as', u'well', u'as', u'other', u'militant', u'organisations', u'accused', u'of', u'targetting', u'india', u'military', u'tensions', u'between', u'india', u'and', u'pakistan', u'have', u'escalated', u'to', u'level', u'not', u'seen', u'since', u'their', u'war'], tags=[1])]" ] }, "execution_count": 5, @@ -169,7 +169,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[['the', 'national', 'executive', 'of', 'the', 'strife', 'torn', 'democrats', 'last', 'night', 'appointed', 'little', 'known', 'west', 'australian', 'senator', 'brian', 'greig', 'as', 'interim', 'leader', 'shock', 'move', 'likely', 'to', 'provoke', 'further', 'conflict', 'between', 'the', 'party', 'senators', 'and', 'its', 'organisation', 'in', 'move', 'to', 'reassert', 'control', 'over', 'the', 'party', 'seven', 'senators', 'the', 'national', 'executive', 'last', 'night', 'rejected', 'aden', 'ridgeway', 'bid', 'to', 'become', 'interim', 'leader', 'in', 'favour', 'of', 'senator', 'greig', 'supporter', 'of', 'deposed', 'leader', 'natasha', 'stott', 'despoja', 'and', 'an', 'outspoken', 'gay', 'rights', 'activist'], ['cash', 'strapped', 'financial', 'services', 'group', 'amp', 'has', 'shelved', 'million', 'plan', 'to', 'buy', 'shares', 'back', 'from', 'investors', 'and', 'will', 'raise', 'million', 'in', 'fresh', 'capital', 'after', 'profits', 'crashed', 'in', 'the', 'six', 'months', 'to', 'june', 'chief', 'executive', 'paul', 'batchelor', 'said', 'the', 'result', 'was', 'solid', 'in', 'what', 'he', 'described', 'as', 'the', 'worst', 'conditions', 'for', 'stock', 'markets', 'in', 'years', 'amp', 'half', 'year', 'profit', 'sank', 'per', 'cent', 'to', 'million', 'or', 'share', 'as', 'australia', 'largest', 'investor', 'and', 'fund', 'manager', 'failed', 'to', 'hit', 'projected', 'per', 'cent', 'earnings', 'growth', 'targets', 'and', 'was', 'battered', 'by', 'falling', 'returns', 'on', 'share', 'markets']]\n" + "[[u'the', u'national', u'executive', u'of', u'the', u'strife', u'torn', u'democrats', u'last', u'night', u'appointed', u'little', u'known', u'west', u'australian', u'senator', u'brian', u'greig', u'as', u'interim', u'leader', u'shock', u'move', u'likely', u'to', u'provoke', u'further', u'conflict', u'between', u'the', u'party', u'senators', u'and', u'its', u'organisation', u'in', u'move', u'to', u'reassert', u'control', u'over', u'the', u'party', u'seven', u'senators', u'the', u'national', u'executive', u'last', u'night', u'rejected', u'aden', u'ridgeway', u'bid', u'to', u'become', u'interim', u'leader', u'in', u'favour', u'of', u'senator', u'greig', u'supporter', u'of', u'deposed', u'leader', u'natasha', u'stott', u'despoja', u'and', u'an', u'outspoken', u'gay', u'rights', u'activist'], [u'cash', u'strapped', u'financial', u'services', u'group', u'amp', u'has', u'shelved', u'million', u'plan', u'to', u'buy', u'shares', u'back', u'from', u'investors', u'and', u'will', u'raise', u'million', u'in', u'fresh', u'capital', u'after', u'profits', u'crashed', u'in', u'the', u'six', u'months', u'to', u'june', u'chief', u'executive', u'paul', u'batchelor', u'said', u'the', u'result', u'was', u'solid', u'in', u'what', u'he', u'described', u'as', u'the', u'worst', u'conditions', u'for', u'stock', u'markets', u'in', u'years', u'amp', u'half', u'year', u'profit', u'sank', u'per', u'cent', u'to', u'million', u'or', u'share', u'as', u'australia', u'largest', u'investor', u'and', u'fund', u'manager', u'failed', u'to', u'hit', u'projected', u'per', u'cent', u'earnings', u'growth', u'targets', u'and', u'was', u'battered', u'by', u'falling', u'returns', u'on', u'share', u'markets']]\n" ] } ], @@ -202,7 +202,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now, we'll instantiate a Doc2Vec model with a vector size with 50 words and iterating over the training corpus 10 times. We set the minimum word count to 2 in order to give higher frequency words more weighting. Model accuracy can be improved by increasing the number of iterations but this generally increases the training time." + "Now, we'll instantiate a Doc2Vec model with a vector size with 50 words and iterating over the training corpus 55 times. We set the minimum word count to 2 in order to give higher frequency words more weighting. Model accuracy can be improved by increasing the number of iterations but this generally increases the training time. Small datasets with short documents, like this one, can benefit from more training passes." ] }, { @@ -213,7 +213,7 @@ }, "outputs": [], "source": [ - "model = gensim.models.doc2vec.Doc2Vec(size=50, min_count=2, iter=10)" + "model = gensim.models.doc2vec.Doc2Vec(size=50, min_count=2, iter=55)" ] }, { @@ -247,7 +247,7 @@ "source": [ "### Time to Train\n", "\n", - "If the BLAS library is being used, this should take no more than 2 seconds.\n", + "If the BLAS library is being used, this should take no more than 3 seconds.\n", "If the BLAS library is not being used, this should take no more than 2 minutes, so use BLAS if you value your time." ] }, @@ -262,14 +262,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.03 s, sys: 4.04 ms, total: 1.03 s\n", - "Wall time: 1.02 s\n" + "CPU times: user 6.34 s, sys: 109 ms, total: 6.45 s\n", + "Wall time: 2.58 s\n" ] }, { "data": { "text/plain": [ - "554260" + "2347665" ] }, "execution_count": 9, @@ -305,16 +305,16 @@ { "data": { "text/plain": [ - "array([ 0.01498613, -0.04319233, -0.09383711, -0.11391422, 0.01824722,\n", - " -0.08820333, -0.10123429, -0.04619413, 0.06180811, 0.00963261,\n", - " 0.01016402, -0.04718231, 0.03931789, 0.00525723, 0.03888206,\n", - " -0.03022155, 0.05980202, -0.07421142, -0.00868064, 0.06489716,\n", - " -0.03265649, -0.01281992, 0.18542686, -0.03033197, 0.0381287 ,\n", - " -0.05379254, -0.02662263, -0.07465494, -0.04721382, -0.01964734,\n", - " 0.02553383, -0.03766384, 0.02129765, 0.11965464, -0.03847834,\n", - " 0.01352374, 0.06914927, -0.02476629, 0.0576441 , -0.04563627,\n", - " 0.05643371, 0.031572 , 0.01302914, 0.01272549, 0.09680226,\n", - " -0.00038328, 0.04555022, 0.04019492, -0.06818081, -0.02148021], dtype=float32)" + "array([ 0.02664499, 0.00475204, -0.03981256, 0.03796276, -0.03206162,\n", + " 0.10963056, -0.04897128, 0.00151982, -0.03258783, 0.04711508,\n", + " -0.00667155, -0.08523653, -0.02975186, 0.00166316, 0.01915652,\n", + " -0.03415785, -0.05794788, 0.05110953, 0.01623618, -0.00512495,\n", + " -0.06385455, -0.0151557 , 0.00365376, 0.03015811, 0.0229462 ,\n", + " 0.03176891, 0.01117626, -0.00743352, 0.02030453, -0.05072152,\n", + " -0.00498496, 0.00151227, 0.06122205, -0.01811385, -0.01715777,\n", + " 0.04883198, 0.03925886, -0.03568915, 0.00805744, 0.01654406,\n", + " -0.05160677, 0.0119908 , -0.01527433, 0.02209963, -0.10316766,\n", + " -0.01069367, -0.02432527, 0.00761799, 0.02763799, -0.04288232], dtype=float32)" ] }, "execution_count": 10, @@ -377,7 +377,7 @@ { "data": { "text/plain": [ - "Counter({0: 292, 1: 8})" + "Counter({0: 289, 1: 11})" ] }, "execution_count": 12, @@ -393,12 +393,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Basically, greater than 95% of the inferred documents are found to be most similar to itself and about 5% of the time it is mistakenly most similar to another document. This is great and not entirely surprising. We can take a look at an example:" + "Basically, greater than 95% of the inferred documents are found to be most similar to itself and about 5% of the time it is mistakenly most similar to another document. the checking of an inferred-vector against a training-vector is a sort of 'sanity check' as to whether the model is behaving in a usefully consistent manner, though not a real 'accuracy' value.\n", + "\n", + "This is great and not entirely surprising. We can take a look at an example:" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -409,13 +411,13 @@ "text": [ "Document (299): «australia will take on france in the doubles rubber of the davis cup tennis final today with the tie levelled at wayne arthurs and todd woodbridge are scheduled to lead australia in the doubles against cedric pioline and fabrice santoro however changes can be made to the line up up to an hour before the match and australian team captain john fitzgerald suggested he might do just that we ll make team appraisal of the whole situation go over the pros and cons and make decision french team captain guy forget says he will not make changes but does not know what to expect from australia todd is the best doubles player in the world right now so expect him to play he said would probably use wayne arthurs but don know what to expect really pat rafter salvaged australia davis cup campaign yesterday with win in the second singles match rafter overcame an arm injury to defeat french number one sebastien grosjean in three sets the australian says he is happy with his form it not very pretty tennis there isn too many consistent bounces you are playing like said bit of classic old grass court rafter said rafter levelled the score after lleyton hewitt shock five set loss to nicholas escude in the first singles rubber but rafter says he felt no added pressure after hewitt defeat knew had good team to back me up even if we were down he said knew could win on the last day know the boys can win doubles so even if we were down still feel we are good enough team to win and vice versa they are good enough team to beat us as well»\n", "\n", - "SIMILAR/DISSIMILAR DOCS PER MODEL Doc2Vec(dm/s,d50,hs,w8,mc2):\n", + "SIMILAR/DISSIMILAR DOCS PER MODEL Doc2Vec(dm/m,d50,n5,w5,mc2,s0.001,t3):\n", "\n", - "MOST (299, 0.8740596175193787): «australia will take on france in the doubles rubber of the davis cup tennis final today with the tie levelled at wayne arthurs and todd woodbridge are scheduled to lead australia in the doubles against cedric pioline and fabrice santoro however changes can be made to the line up up to an hour before the match and australian team captain john fitzgerald suggested he might do just that we ll make team appraisal of the whole situation go over the pros and cons and make decision french team captain guy forget says he will not make changes but does not know what to expect from australia todd is the best doubles player in the world right now so expect him to play he said would probably use wayne arthurs but don know what to expect really pat rafter salvaged australia davis cup campaign yesterday with win in the second singles match rafter overcame an arm injury to defeat french number one sebastien grosjean in three sets the australian says he is happy with his form it not very pretty tennis there isn too many consistent bounces you are playing like said bit of classic old grass court rafter said rafter levelled the score after lleyton hewitt shock five set loss to nicholas escude in the first singles rubber but rafter says he felt no added pressure after hewitt defeat knew had good team to back me up even if we were down he said knew could win on the last day know the boys can win doubles so even if we were down still feel we are good enough team to win and vice versa they are good enough team to beat us as well»\n", + "MOST (83, 0.9967287182807922): «the opposition leader simon crean says child abuse scandal in brisbane has damaged the office of the governor general and its incumbent dr peter hollingworth child advocates have called on dr hollingworth to step down as governor general saying he did not do enough to prevent abuse of children in an anglican school when he was archbishop of brisbane mr crean says he is not calling on dr hollingworth to resign but he says there are still unanswered questions think it has tarnished the office of the governor general the fact that it took so long for this statement to come out he said many people have been calling for it me included think if we are to avoid further damage to the office we need to clear it up completely brisbane lord mayor says the governor general explanation of his handling of child sex abuse allegations at queensland school raises more questions than it answers jim soorley who is former catholic priest says the explanation does not wash within the christian tradition bishops are regarded as shepherds he said it very clear that he was not good shepherd and there are serious consequences for that think his actions are not the actions of good shepherd and think there are still questions to be answered»\n", "\n", - "MEDIAN (293, 0.035127244889736176): «george harrison the guitarist songwriter and film producer was widely known as the quiet beatle as the youngest beatle he had to be snuck in underage to venues prior to the band phenomenal success in the early he was responsible for some of the band classic songs such as taxman here comes the sun and something but up against the genius of paul mccartney and john lennon his songs were hard pressed to make it onto vinyl resentment built up and harrison withdrew from the limelight after the beatles broke up he found solo success in with the track my sweet lord although he was successfully sued for plagiarism and had to pay out half million pounds in damages always against beatles reformation he famously declared in that the band would reform when lennon was no longer dead in later years he was beset by lung and throat cancer he was lucky to survive stabbing by an intruder in his uk home in he was known for his love of eastern mysticism motor racing and his second wife olivia who saved his life in the knife attack»\n", + "MEDIAN (252, 0.9926513433456421): «the labor party is set to have wide ranging review of its structures with frontbencher martin ferguson pushing for the process the new labor leader simon crean is taking set of proposals to next thursday national executive meeting mr ferguson wants the meeting to call review he says suggestions for party changes such as the call by frontbencher joel fitzgibbon for the scrapping of new south wales rule forcing labor members to belong to union should be dealt with internally perhaps the time has come for us to actually sign up to federal executive process which actually enables debate to go forward in highly constructive way rather than individual proposals being put out there without an end game in sight mr ferguson said he says he is happy for the process to include looking at abandoning the rule but says scrapping the rule would only be minor factor in the party self examination for long time ve believed there is requirement for the labor party to actually have hard look at whether or not an archaic formula of union representation is the key to our future»\n", "\n", - "LEAST (14, -0.5578939914703369): «the israeli army has killed three palestinian militants who attacked one of its armoured vehicles in the northern gaza strip the three palestinians opened fire with rifles at the vehicle between the jewish settlements of alei sinai and nitzanit on the northern edge of the gaza strip they were killed by shell fire from tank the sources said during the fire fight an israeli army observation post called in tank fire which killed the three gunmen the killing brings the death toll of months of the palestinian uprising against israeli occupation to people including palestinians and israelis alei sinai was attacked on october when two gunmen from the radical islamic group hamas infiltrated the settlement and opened fire on the residents killing two teenage israelis two palestinian gunmen also killed an israeli settler in alei sinai on december before being killed by the army»\n", + "LEAST (250, 0.9795532822608948): «israel launched massive air raids across the west bank and gaza tuesday piling pressure on yasser arafat with rocket strike on police post next to his offices after prime minister ariel sharon branded his administration sponsor of terrorism israeli warplanes launched series of strikes on gaza city while apache helicopters fired rockets on palestinian security offices in khan yunis in the southern gaza strip and on the west bank towns of salfit and tulkarem they also fired missiles on security post just metres from mr arafat offices in ramallah but the palestinian leader who was in his office at the time was unhurt but two policemen were slightly wounded officials said israeli army spokesman brigadier general ron kitrey said mr arafat was not targeted two people were killed in the gaza strikes and around injured half of them schoolboys palestinian hospital officials said the attacks came as israel foreign minister shimon peres said he did not believe israeli forces would take direct action against the palestinian leader the strikes also came day after mr sharon furious that mr arafat had not stopped hardline islamic groups who killed two dozen israelis in devastating suicide attacks at the weekend ordered his forces to blast symbols of mr arafat power gunships destroyed mr arafat three helicopters in gaza city while bulldozers ploughed up the runway at gaza international airport used by mr arafat for his frequent travels abroad palestinian officials called mr sharon campaign an attempt to topple mr arafat and destroy his self rule palestinian authority mr arafat told cnn television that mr sharon was trying to torpedo his own crackdown on terrorism with the airstrikes he doesn want me to succeed and for this he is escalating his military activities against our towns our cities our establishments the palestinian leader said french foreign minister hubert vedrine accused israel of conducting deliberate policy aimed at eliminating mr arafat arafat has been weakened by the harassment of the israeli army and as result people are using his weakness as an argument to say that since he can not re establish order in his own camp he should in some way be eliminated however britain prime minister tony blair and us president george bush expressed sympathy with israel and called on all sides to do anything they can to stabilise the situation mr sharon hard words and air strikes opened major divisions in his cross party government with left wing mr peres denouncing what he called bid during monday emergency cabinet meeting to cause the downfall of the palestinian authority the region had been braced for huge israeli retaliation after three palestinian suicide bombers from the hardline islamic movement hamas killed people on saturday and sunday in the suicide attacks in jerusalem and haifa mr sharon made national address after blasting gaza city and jenin in the west bank on monday accusing mr arafat of having chosen the path of terrorism and being the greatest obstacle to peace and stability in the middle east mr peres said the move by mr sharon dominant right wingers in effect means israeli policy is based purely on force with no political hope public radio said mr peres had called all the ministers from his labour party for special meeting wednesday to discuss the fallout of the strikes and mr sharon accusation that mr arafat was responsible for everything that has happened here chief palestinian negotiator saeb erakat speaking after mr sharon speech monday evening said the words amounted to declaration of war he called on the united states and europe to rein in mr sharon and dispatch international observers to oversee the spiralling conflict»\n", "\n" ] } @@ -525,21 +527,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 2", "language": "python", - "name": "python3" + "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" + "pygments_lexer": "ipython2", + "version": "2.7.12" } }, "nbformat": 4,