diff --git a/DBSCAN.ipynb b/DBSCAN.ipynb new file mode 100644 index 0000000..9cee4c7 --- /dev/null +++ b/DBSCAN.ipynb @@ -0,0 +1,1986 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DBSCAN (Density-Based Spatial Clustering of Applications with Noise)\n", + "\n", + "\n", + "Find core samples of high density and expand clusters from them.\n", + "\n", + "The minimum number of samples in a neighborhood for a point to be considered as a core point was set at 10 and the maximum distance between two samples within the same neighborhood was set at 0.1.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
0201523229349300327.xml510311790.00.9896190.0918021.574677-0.0786634.643180e+05
1201543089349301829.xml261460932.00.9653780.0000003.910347-0.0426742.743900e+04
2201533179349306298.xml270609504.00.9422760.0492060.6551520.0885973.848280e+05
3201533209349304768.xml521548962.00.3845260.00081430.3927560.6331265.091200e+04
4201533179349307343.xml731653383.00.6205600.0294871.0837390.1059012.812300e+04
5201533189349300608.xml237324566.00.7589020.736982-0.176215-0.0039146.081260e+05
6201523069349301367.xml43259150.00.7363120.1819980.3785920.0575342.129680e+05
7201533069349300963.xml621273871.00.6448220.23424714.4131630.4961221.426413e+06
8201523099349300542.xml541897455.00.8456590.1385310.414986-0.0167694.615120e+05
9201533099349301033.xml251869168.00.7623570.0000008.6739840.1222441.020440e+05
10201523169349304367.xml376046335.00.7453860.00000016.4639760.2525793.717000e+04
11201533099349301803.xml50454409.00.9152610.0000001.1587680.0626495.573470e+05
12201523069349300142.xml272364809.00.9203070.0036060.240597-0.0296633.020610e+05
13201543039349301204.xml463812139.00.6640880.6484130.0458230.0888121.399740e+05
14201523089349301462.xml561797737.00.7046230.4326710.000000-0.0503842.147730e+05
15201533069349300788.xml590638867.00.0000000.0000000.360157-0.0558837.625220e+05
16201533079349300238.xml273314429.01.0000000.0000000.0000000.1075842.617100e+04
17201523209349314257.xml710801566.01.0000000.0000000.000000-8.2123371.145333e+06
18201523209349311332.xml752204594.00.8968950.0697150.535678-0.0641003.889691e+06
19201533179349302173.xml251374594.00.0000000.3136300.3375370.0148094.235120e+05
20201533179349307048.xml450448773.00.9912050.0513480.4149300.1615521.858940e+05
21201533209349302633.xml363789851.00.0000000.2320110.521133-0.1289977.329143e+06
22201533099349301113.xml411712804.00.4776970.0098870.751388-0.0145078.147200e+05
23201523039349300127.xml381557861.00.8321590.0638342.9881200.1709111.369609e+06
24201523079349301652.xml273741627.00.9451200.0000000.8636630.0949851.234798e+06
25201533039349300813.xml411495321.00.6681610.040796144.508488-0.1397771.108143e+06
26201533139349300208.xml60666277.00.0000000.6449040.654848-0.2961431.964943e+07
27201533069349301413.xml42616064.00.9512180.3386830.1785170.0411233.325420e+07
28201533079349300003.xml581651220.00.9103470.0268320.945357-0.0202933.247970e+05
29201523069349300957.xml341496171.00.8506170.0688381.1257120.2128996.727750e+05
........................
38884201533099349301698.xml912130056.00.7608280.6106840.104169-2.1635897.340370e+05
38885201543119349300204.xml237198698.00.8199530.5491090.2484790.0603473.518240e+05
38886201543139349302814.xml462250934.00.1316360.9608630.0014250.0053472.279017e+06
38887201533219349301213.xml231416559.00.5987600.8024160.248929-0.2074278.819110e+05
38888201543179349306629.xml223128187.00.6211040.0405431.574808-0.1333046.866690e+05
38889201533209349316263.xml731585237.00.8495840.1330370.508528-0.0541462.441360e+05
38890201543079349301044.xml581736427.00.8872241.390273-1.0144150.0529911.413780e+05
38891201533139349300123.xml470841633.00.7308780.2379176.7960360.6154548.076930e+05
38892201543179349305429.xml943345498.00.5703440.0310223.1967370.1460155.820010e+05
38893201543179349308719.xml264795329.00.9848821.067790-0.013730-0.0136016.089029e+06
38894201533179349302373.xml262709818.01.0000001.0000000.0000000.0000003.369300e+04
38895201620149349300127.xml431129770.00.8689050.2033440.8189790.0413411.113451e+07
38896201630129349300803.xml592240502.00.8582380.0905671.5522810.1657166.398019e+06
38897201620119349300422.xml61462359.00.8430570.0873440.250055-0.0862866.898120e+05
38898201630129349300723.xml630652760.00.9928920.3019880.1334940.0001013.599537e+06
38899201610139349300811.xml341472960.01.0000001.0000000.0000000.0000002.762930e+05
38900201600199349300500.xml800182020.00.5948380.1419280.037224-0.1049801.481881e+06
38901201600399349300955.xml462754433.00.0000000.0000000.2352230.0957124.060960e+05
38902201600369349301750.xml742744885.00.9752190.0000000.452366-0.0453732.315450e+05
38903201610399349300226.xml471127421.00.1380380.0000002.0975512.1635481.495530e+05
38904201620359349300612.xml742244155.00.9011250.5599780.0680620.0089632.399374e+07
38905201610429349302361.xml940764760.00.9922750.7885090.0348230.0142755.107870e+05
38906201610369349301226.xml840588263.00.9889250.0808600.3132220.1663712.257280e+05
38907201640359349300129.xml20650275.00.6812100.4983410.2138520.0385973.040090e+05
38908201600429349301735.xml942418861.00.9877890.0155953.4295930.5268832.165501e+06
38909201600359349302005.xml223201871.00.9079860.0000003.0523420.2813922.319760e+05
38910201600419349301130.xml650599763.00.9555814.173882-0.4140360.0581411.489220e+05
38911201640359349301614.xml470461460.00.4652350.0140511.6668840.1821972.236610e+05
38912201600429349302135.xml990208381.00.8523440.0479280.8158710.2293521.879170e+05
38913201610229349300741.xml510216586.00.5219380.7466250.5294890.0265074.834544e+09
\n", + "

38657 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "0 201523229349300327.xml 510311790.0 0.989619 0.091802 \n", + "1 201543089349301829.xml 261460932.0 0.965378 0.000000 \n", + "2 201533179349306298.xml 270609504.0 0.942276 0.049206 \n", + "3 201533209349304768.xml 521548962.0 0.384526 0.000814 \n", + "4 201533179349307343.xml 731653383.0 0.620560 0.029487 \n", + "5 201533189349300608.xml 237324566.0 0.758902 0.736982 \n", + "6 201523069349301367.xml 43259150.0 0.736312 0.181998 \n", + "7 201533069349300963.xml 621273871.0 0.644822 0.234247 \n", + "8 201523099349300542.xml 541897455.0 0.845659 0.138531 \n", + "9 201533099349301033.xml 251869168.0 0.762357 0.000000 \n", + "10 201523169349304367.xml 376046335.0 0.745386 0.000000 \n", + "11 201533099349301803.xml 50454409.0 0.915261 0.000000 \n", + "12 201523069349300142.xml 272364809.0 0.920307 0.003606 \n", + "13 201543039349301204.xml 463812139.0 0.664088 0.648413 \n", + "14 201523089349301462.xml 561797737.0 0.704623 0.432671 \n", + "15 201533069349300788.xml 590638867.0 0.000000 0.000000 \n", + "16 201533079349300238.xml 273314429.0 1.000000 0.000000 \n", + "17 201523209349314257.xml 710801566.0 1.000000 0.000000 \n", + "18 201523209349311332.xml 752204594.0 0.896895 0.069715 \n", + "19 201533179349302173.xml 251374594.0 0.000000 0.313630 \n", + "20 201533179349307048.xml 450448773.0 0.991205 0.051348 \n", + "21 201533209349302633.xml 363789851.0 0.000000 0.232011 \n", + "22 201533099349301113.xml 411712804.0 0.477697 0.009887 \n", + "23 201523039349300127.xml 381557861.0 0.832159 0.063834 \n", + "24 201523079349301652.xml 273741627.0 0.945120 0.000000 \n", + "25 201533039349300813.xml 411495321.0 0.668161 0.040796 \n", + "26 201533139349300208.xml 60666277.0 0.000000 0.644904 \n", + "27 201533069349301413.xml 42616064.0 0.951218 0.338683 \n", + "28 201533079349300003.xml 581651220.0 0.910347 0.026832 \n", + "29 201523069349300957.xml 341496171.0 0.850617 0.068838 \n", + "... ... ... ... ... \n", + "38884 201533099349301698.xml 912130056.0 0.760828 0.610684 \n", + "38885 201543119349300204.xml 237198698.0 0.819953 0.549109 \n", + "38886 201543139349302814.xml 462250934.0 0.131636 0.960863 \n", + "38887 201533219349301213.xml 231416559.0 0.598760 0.802416 \n", + "38888 201543179349306629.xml 223128187.0 0.621104 0.040543 \n", + "38889 201533209349316263.xml 731585237.0 0.849584 0.133037 \n", + "38890 201543079349301044.xml 581736427.0 0.887224 1.390273 \n", + "38891 201533139349300123.xml 470841633.0 0.730878 0.237917 \n", + "38892 201543179349305429.xml 943345498.0 0.570344 0.031022 \n", + "38893 201543179349308719.xml 264795329.0 0.984882 1.067790 \n", + "38894 201533179349302373.xml 262709818.0 1.000000 1.000000 \n", + "38895 201620149349300127.xml 431129770.0 0.868905 0.203344 \n", + "38896 201630129349300803.xml 592240502.0 0.858238 0.090567 \n", + "38897 201620119349300422.xml 61462359.0 0.843057 0.087344 \n", + "38898 201630129349300723.xml 630652760.0 0.992892 0.301988 \n", + "38899 201610139349300811.xml 341472960.0 1.000000 1.000000 \n", + "38900 201600199349300500.xml 800182020.0 0.594838 0.141928 \n", + "38901 201600399349300955.xml 462754433.0 0.000000 0.000000 \n", + "38902 201600369349301750.xml 742744885.0 0.975219 0.000000 \n", + "38903 201610399349300226.xml 471127421.0 0.138038 0.000000 \n", + "38904 201620359349300612.xml 742244155.0 0.901125 0.559978 \n", + "38905 201610429349302361.xml 940764760.0 0.992275 0.788509 \n", + "38906 201610369349301226.xml 840588263.0 0.988925 0.080860 \n", + "38907 201640359349300129.xml 20650275.0 0.681210 0.498341 \n", + "38908 201600429349301735.xml 942418861.0 0.987789 0.015595 \n", + "38909 201600359349302005.xml 223201871.0 0.907986 0.000000 \n", + "38910 201600419349301130.xml 650599763.0 0.955581 4.173882 \n", + "38911 201640359349301614.xml 470461460.0 0.465235 0.014051 \n", + "38912 201600429349302135.xml 990208381.0 0.852344 0.047928 \n", + "38913 201610229349300741.xml 510216586.0 0.521938 0.746625 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "0 1.574677 -0.078663 4.643180e+05 \n", + "1 3.910347 -0.042674 2.743900e+04 \n", + "2 0.655152 0.088597 3.848280e+05 \n", + "3 30.392756 0.633126 5.091200e+04 \n", + "4 1.083739 0.105901 2.812300e+04 \n", + "5 -0.176215 -0.003914 6.081260e+05 \n", + "6 0.378592 0.057534 2.129680e+05 \n", + "7 14.413163 0.496122 1.426413e+06 \n", + "8 0.414986 -0.016769 4.615120e+05 \n", + "9 8.673984 0.122244 1.020440e+05 \n", + "10 16.463976 0.252579 3.717000e+04 \n", + "11 1.158768 0.062649 5.573470e+05 \n", + "12 0.240597 -0.029663 3.020610e+05 \n", + "13 0.045823 0.088812 1.399740e+05 \n", + "14 0.000000 -0.050384 2.147730e+05 \n", + "15 0.360157 -0.055883 7.625220e+05 \n", + "16 0.000000 0.107584 2.617100e+04 \n", + "17 0.000000 -8.212337 1.145333e+06 \n", + "18 0.535678 -0.064100 3.889691e+06 \n", + "19 0.337537 0.014809 4.235120e+05 \n", + "20 0.414930 0.161552 1.858940e+05 \n", + "21 0.521133 -0.128997 7.329143e+06 \n", + "22 0.751388 -0.014507 8.147200e+05 \n", + "23 2.988120 0.170911 1.369609e+06 \n", + "24 0.863663 0.094985 1.234798e+06 \n", + "25 144.508488 -0.139777 1.108143e+06 \n", + "26 0.654848 -0.296143 1.964943e+07 \n", + "27 0.178517 0.041123 3.325420e+07 \n", + "28 0.945357 -0.020293 3.247970e+05 \n", + "29 1.125712 0.212899 6.727750e+05 \n", + "... ... ... ... \n", + "38884 0.104169 -2.163589 7.340370e+05 \n", + "38885 0.248479 0.060347 3.518240e+05 \n", + "38886 0.001425 0.005347 2.279017e+06 \n", + "38887 0.248929 -0.207427 8.819110e+05 \n", + "38888 1.574808 -0.133304 6.866690e+05 \n", + "38889 0.508528 -0.054146 2.441360e+05 \n", + "38890 -1.014415 0.052991 1.413780e+05 \n", + "38891 6.796036 0.615454 8.076930e+05 \n", + "38892 3.196737 0.146015 5.820010e+05 \n", + "38893 -0.013730 -0.013601 6.089029e+06 \n", + "38894 0.000000 0.000000 3.369300e+04 \n", + "38895 0.818979 0.041341 1.113451e+07 \n", + "38896 1.552281 0.165716 6.398019e+06 \n", + "38897 0.250055 -0.086286 6.898120e+05 \n", + "38898 0.133494 0.000101 3.599537e+06 \n", + "38899 0.000000 0.000000 2.762930e+05 \n", + "38900 0.037224 -0.104980 1.481881e+06 \n", + "38901 0.235223 0.095712 4.060960e+05 \n", + "38902 0.452366 -0.045373 2.315450e+05 \n", + "38903 2.097551 2.163548 1.495530e+05 \n", + "38904 0.068062 0.008963 2.399374e+07 \n", + "38905 0.034823 0.014275 5.107870e+05 \n", + "38906 0.313222 0.166371 2.257280e+05 \n", + "38907 0.213852 0.038597 3.040090e+05 \n", + "38908 3.429593 0.526883 2.165501e+06 \n", + "38909 3.052342 0.281392 2.319760e+05 \n", + "38910 -0.414036 0.058141 1.489220e+05 \n", + "38911 1.666884 0.182197 2.236610e+05 \n", + "38912 0.815871 0.229352 1.879170e+05 \n", + "38913 0.529489 0.026507 4.834544e+09 \n", + "\n", + "[38657 rows x 7 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "df1=pd.read_csv('team_out_1.csv')\n", + "df2=pd.read_csv('team_out_a2.csv')\n", + "\n", + "df=df1.append(df2)\n", + "df.dropna(inplace=True)\n", + "df.reset_index(inplace=True,drop=True)\n", + "df=df[df.Total_Expenses>0]\n", + "df=df[df.Program_Exp<=1]\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "small_df=df[df.Total_Expenses<1000000]\n", + "\n", + "med_df=df[df.Total_Expenses>1000000]\n", + "med_df=med_df[df.Total_Expenses<10000000]\n", + "\n", + "large_df=df[df.Total_Expenses<50000000]\n", + "large_df=large_df[df.Total_Expenses>10000000]\n", + "\n", + "national_df=df[df.Total_Expenses>50000000]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "#Getting a list of positive businesses\n", + "\n", + "temp=df[df.Program_Exp>.9] \n", + "temp=temp[temp.Liabilities_To_Asset<.2]\n", + "lst_temp=list(temp['EIN'])" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Program_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_ExpensesFilename
EIN
510311790.00.9896190.0134840.4636500.9572310.000068201523229349300327.xml
261460932.00.9653780.0134790.4636570.9572310.000004201543089349301829.xml
270609504.00.9422760.0134820.4636470.9572320.000056201533179349306298.xml
521548962.00.3845260.0134790.4637400.9572340.000007201533209349304768.xml
731653383.00.6205600.0134800.4636490.9572320.000004201533179349307343.xml
237324566.00.7589020.0135220.4636450.9572310.000089201533189349300608.xml
43259150.00.7363120.0134890.4636460.9572310.000031201523069349301367.xml
621273871.00.6448220.0134920.4636900.9572330.000209201533069349300963.xml
541897455.00.8456590.0134870.4636460.9572310.000068201523099349300542.xml
251869168.00.7623570.0134790.4636720.9572320.000015201533099349301033.xml
376046335.00.7453860.0134790.4636960.9572320.000005201523169349304367.xml
50454409.00.9152610.0134790.4636490.9572310.000082201533099349301803.xml
272364809.00.9203070.0134790.4636460.9572310.000044201523069349300142.xml
463812139.00.6640880.0135170.4636450.9572320.000020201543039349301204.xml
561797737.00.7046230.0135040.4636450.9572310.000031201523089349301462.xml
590638867.00.0000000.0134790.4636460.9572310.000112201533069349300788.xml
273314429.01.0000000.0134790.4636450.9572320.000004201533079349300238.xml
710801566.01.0000000.0134790.4636450.9572000.000168201523209349314257.xml
752204594.00.8968950.0134830.4636470.9572310.000569201523209349311332.xml
251374594.00.0000000.0134970.4636460.9572310.000062201533179349302173.xml
450448773.00.9912050.0134820.4636460.9572320.000027201533179349307048.xml
363789851.00.0000000.0134920.4636470.9572310.001072201533209349302633.xml
411712804.00.4776970.0134790.4636480.9572310.000119201533099349301113.xml
381557861.00.8321590.0134820.4636540.9572320.000200201523039349300127.xml
273741627.00.9451200.0134790.4636480.9572320.000181201523079349301652.xml
411495321.00.6681610.0134810.4640950.9572310.000162201533039349300813.xml
60666277.00.0000000.0135160.4636470.9572300.002874201533139349300208.xml
42616064.00.9512180.0134990.4636460.9572310.004864201533069349301413.xml
581651220.00.9103470.0134800.4636480.9572310.000048201533079349300003.xml
341496171.00.8506170.0134830.4636490.9572320.000098201523069349300957.xml
.....................
912130056.00.7608280.0135140.4636450.9572230.000107201533099349301698.xml
237198698.00.8199530.0135110.4636460.9572310.000051201543119349300204.xml
462250934.00.1316360.0135350.4636450.9572310.000333201543139349302814.xml
231416559.00.5987600.0135260.4636460.9572300.000129201533219349301213.xml
223128187.00.6211040.0134810.4636500.9572310.000100201543179349306629.xml
731585237.00.8495840.0134860.4636470.9572310.000036201533209349316263.xml
581736427.00.8872240.0135600.4636420.9572310.000021201543079349301044.xml
470841633.00.7308780.0134930.4636660.9572340.000118201533139349300123.xml
943345498.00.5703440.0134810.4636550.9572320.000085201543179349305429.xml
264795329.00.9848820.0135410.4636450.9572310.000891201543179349308719.xml
262709818.01.0000000.0135370.4636450.9572310.000005201533179349302373.xml
431129770.00.8689050.0134910.4636480.9572310.001629201620149349300127.xml
592240502.00.8582380.0134840.4636500.9572320.000936201630129349300803.xml
61462359.00.8430570.0134840.4636460.9572310.000101201620119349300422.xml
630652760.00.9928920.0134960.4636460.9572310.000526201630129349300723.xml
341472960.01.0000000.0135370.4636450.9572310.000040201610139349300811.xml
800182020.00.5948380.0134870.4636450.9572310.000217201600199349300500.xml
462754433.00.0000000.0134790.4636460.9572320.000059201600399349300955.xml
742744885.00.9752190.0134790.4636470.9572310.000034201600369349301750.xml
471127421.00.1380380.0134790.4636520.9572390.000022201610399349300226.xml
742244155.00.9011250.0135110.4636450.9572310.003509201620359349300612.xml
940764760.00.9922750.0135250.4636450.9572310.000075201610429349302361.xml
840588263.00.9889250.0134830.4636460.9572320.000033201610369349301226.xml
20650275.00.6812100.0135080.4636460.9572310.000044201640359349300129.xml
942418861.00.9877890.0134800.4636560.9572330.000317201600429349301735.xml
223201871.00.9079860.0134790.4636550.9572320.000034201600359349302005.xml
650599763.00.9555810.0137230.4636440.9572310.000022201600419349301130.xml
470461460.00.4652350.0134800.4636500.9572320.000033201640359349301614.xml
990208381.00.8523440.0134820.4636480.9572320.000027201600429349302135.xml
510216586.00.5219380.0135220.4636470.9572310.707131201610229349300741.xml
\n", + "

38657 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Program_Exp Liabilities_To_Asset Working_Capital \\\n", + "EIN \n", + "510311790.0 0.989619 0.013484 0.463650 \n", + "261460932.0 0.965378 0.013479 0.463657 \n", + "270609504.0 0.942276 0.013482 0.463647 \n", + "521548962.0 0.384526 0.013479 0.463740 \n", + "731653383.0 0.620560 0.013480 0.463649 \n", + "237324566.0 0.758902 0.013522 0.463645 \n", + "43259150.0 0.736312 0.013489 0.463646 \n", + "621273871.0 0.644822 0.013492 0.463690 \n", + "541897455.0 0.845659 0.013487 0.463646 \n", + "251869168.0 0.762357 0.013479 0.463672 \n", + "376046335.0 0.745386 0.013479 0.463696 \n", + "50454409.0 0.915261 0.013479 0.463649 \n", + "272364809.0 0.920307 0.013479 0.463646 \n", + "463812139.0 0.664088 0.013517 0.463645 \n", + "561797737.0 0.704623 0.013504 0.463645 \n", + "590638867.0 0.000000 0.013479 0.463646 \n", + "273314429.0 1.000000 0.013479 0.463645 \n", + "710801566.0 1.000000 0.013479 0.463645 \n", + "752204594.0 0.896895 0.013483 0.463647 \n", + "251374594.0 0.000000 0.013497 0.463646 \n", + "450448773.0 0.991205 0.013482 0.463646 \n", + "363789851.0 0.000000 0.013492 0.463647 \n", + "411712804.0 0.477697 0.013479 0.463648 \n", + "381557861.0 0.832159 0.013482 0.463654 \n", + "273741627.0 0.945120 0.013479 0.463648 \n", + "411495321.0 0.668161 0.013481 0.464095 \n", + "60666277.0 0.000000 0.013516 0.463647 \n", + "42616064.0 0.951218 0.013499 0.463646 \n", + "581651220.0 0.910347 0.013480 0.463648 \n", + "341496171.0 0.850617 0.013483 0.463649 \n", + "... ... ... ... \n", + "912130056.0 0.760828 0.013514 0.463645 \n", + "237198698.0 0.819953 0.013511 0.463646 \n", + "462250934.0 0.131636 0.013535 0.463645 \n", + "231416559.0 0.598760 0.013526 0.463646 \n", + "223128187.0 0.621104 0.013481 0.463650 \n", + "731585237.0 0.849584 0.013486 0.463647 \n", + "581736427.0 0.887224 0.013560 0.463642 \n", + "470841633.0 0.730878 0.013493 0.463666 \n", + "943345498.0 0.570344 0.013481 0.463655 \n", + "264795329.0 0.984882 0.013541 0.463645 \n", + "262709818.0 1.000000 0.013537 0.463645 \n", + "431129770.0 0.868905 0.013491 0.463648 \n", + "592240502.0 0.858238 0.013484 0.463650 \n", + "61462359.0 0.843057 0.013484 0.463646 \n", + "630652760.0 0.992892 0.013496 0.463646 \n", + "341472960.0 1.000000 0.013537 0.463645 \n", + "800182020.0 0.594838 0.013487 0.463645 \n", + "462754433.0 0.000000 0.013479 0.463646 \n", + "742744885.0 0.975219 0.013479 0.463647 \n", + "471127421.0 0.138038 0.013479 0.463652 \n", + "742244155.0 0.901125 0.013511 0.463645 \n", + "940764760.0 0.992275 0.013525 0.463645 \n", + "840588263.0 0.988925 0.013483 0.463646 \n", + "20650275.0 0.681210 0.013508 0.463646 \n", + "942418861.0 0.987789 0.013480 0.463656 \n", + "223201871.0 0.907986 0.013479 0.463655 \n", + "650599763.0 0.955581 0.013723 0.463644 \n", + "470461460.0 0.465235 0.013480 0.463650 \n", + "990208381.0 0.852344 0.013482 0.463648 \n", + "510216586.0 0.521938 0.013522 0.463647 \n", + "\n", + " Surplus_Margin Total_Expenses Filename \n", + "EIN \n", + "510311790.0 0.957231 0.000068 201523229349300327.xml \n", + "261460932.0 0.957231 0.000004 201543089349301829.xml \n", + "270609504.0 0.957232 0.000056 201533179349306298.xml \n", + "521548962.0 0.957234 0.000007 201533209349304768.xml \n", + "731653383.0 0.957232 0.000004 201533179349307343.xml \n", + "237324566.0 0.957231 0.000089 201533189349300608.xml \n", + "43259150.0 0.957231 0.000031 201523069349301367.xml \n", + "621273871.0 0.957233 0.000209 201533069349300963.xml \n", + "541897455.0 0.957231 0.000068 201523099349300542.xml \n", + "251869168.0 0.957232 0.000015 201533099349301033.xml \n", + "376046335.0 0.957232 0.000005 201523169349304367.xml \n", + "50454409.0 0.957231 0.000082 201533099349301803.xml \n", + "272364809.0 0.957231 0.000044 201523069349300142.xml \n", + "463812139.0 0.957232 0.000020 201543039349301204.xml \n", + "561797737.0 0.957231 0.000031 201523089349301462.xml \n", + "590638867.0 0.957231 0.000112 201533069349300788.xml \n", + "273314429.0 0.957232 0.000004 201533079349300238.xml \n", + "710801566.0 0.957200 0.000168 201523209349314257.xml \n", + "752204594.0 0.957231 0.000569 201523209349311332.xml \n", + "251374594.0 0.957231 0.000062 201533179349302173.xml \n", + "450448773.0 0.957232 0.000027 201533179349307048.xml \n", + "363789851.0 0.957231 0.001072 201533209349302633.xml \n", + "411712804.0 0.957231 0.000119 201533099349301113.xml \n", + "381557861.0 0.957232 0.000200 201523039349300127.xml \n", + "273741627.0 0.957232 0.000181 201523079349301652.xml \n", + "411495321.0 0.957231 0.000162 201533039349300813.xml \n", + "60666277.0 0.957230 0.002874 201533139349300208.xml \n", + "42616064.0 0.957231 0.004864 201533069349301413.xml \n", + "581651220.0 0.957231 0.000048 201533079349300003.xml \n", + "341496171.0 0.957232 0.000098 201523069349300957.xml \n", + "... ... ... ... \n", + "912130056.0 0.957223 0.000107 201533099349301698.xml \n", + "237198698.0 0.957231 0.000051 201543119349300204.xml \n", + "462250934.0 0.957231 0.000333 201543139349302814.xml \n", + "231416559.0 0.957230 0.000129 201533219349301213.xml \n", + "223128187.0 0.957231 0.000100 201543179349306629.xml \n", + "731585237.0 0.957231 0.000036 201533209349316263.xml \n", + "581736427.0 0.957231 0.000021 201543079349301044.xml \n", + "470841633.0 0.957234 0.000118 201533139349300123.xml \n", + "943345498.0 0.957232 0.000085 201543179349305429.xml \n", + "264795329.0 0.957231 0.000891 201543179349308719.xml \n", + "262709818.0 0.957231 0.000005 201533179349302373.xml \n", + "431129770.0 0.957231 0.001629 201620149349300127.xml \n", + "592240502.0 0.957232 0.000936 201630129349300803.xml \n", + "61462359.0 0.957231 0.000101 201620119349300422.xml \n", + "630652760.0 0.957231 0.000526 201630129349300723.xml \n", + "341472960.0 0.957231 0.000040 201610139349300811.xml \n", + "800182020.0 0.957231 0.000217 201600199349300500.xml \n", + "462754433.0 0.957232 0.000059 201600399349300955.xml \n", + "742744885.0 0.957231 0.000034 201600369349301750.xml \n", + "471127421.0 0.957239 0.000022 201610399349300226.xml \n", + "742244155.0 0.957231 0.003509 201620359349300612.xml \n", + "940764760.0 0.957231 0.000075 201610429349302361.xml \n", + "840588263.0 0.957232 0.000033 201610369349301226.xml \n", + "20650275.0 0.957231 0.000044 201640359349300129.xml \n", + "942418861.0 0.957233 0.000317 201600429349301735.xml \n", + "223201871.0 0.957232 0.000034 201600359349302005.xml \n", + "650599763.0 0.957231 0.000022 201600419349301130.xml \n", + "470461460.0 0.957232 0.000033 201640359349301614.xml \n", + "990208381.0 0.957232 0.000027 201600429349302135.xml \n", + "510216586.0 0.957231 0.707131 201610229349300741.xml \n", + "\n", + "[38657 rows x 6 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.reset_index(drop=True,inplace=True)\n", + "norm_df=df.copy()\n", + "\n", + "norm_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses']]\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "x = norm_df.values #returns a numpy array\n", + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "x_scaled = min_max_scaler.fit_transform(x)\n", + "norm_df = pd.DataFrame(x_scaled)\n", + "norm_df[\"Filename\"]=df['Filename']\n", + "\n", + "norm_df[\"EIN\"]=df['EIN']\n", + "norm_df.columns=['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses','Filename','EIN']\n", + "norm_df.set_index('EIN',inplace=True)\n", + "norm_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Program_Exp Liabilities_To_Asset Working_Capital Surplus_Margin \\\n", + "0 0.684058 0.013484 0.463650 0.957231 \n", + "1 0.667302 0.013479 0.463657 0.957231 \n", + "2 0.651333 0.013482 0.463647 0.957232 \n", + "3 0.265798 0.013479 0.463740 0.957234 \n", + "4 0.428952 0.013480 0.463649 0.957232 \n", + "5 0.524579 0.013522 0.463645 0.957231 \n", + "6 0.508964 0.013489 0.463646 0.957231 \n", + "7 0.445723 0.013492 0.463690 0.957233 \n", + "8 0.584548 0.013487 0.463646 0.957231 \n", + "9 0.526967 0.013479 0.463672 0.957232 \n", + "10 0.515236 0.013479 0.463696 0.957232 \n", + "11 0.632659 0.013479 0.463649 0.957231 \n", + "12 0.636148 0.013479 0.463646 0.957231 \n", + "13 0.459040 0.013517 0.463645 0.957232 \n", + "14 0.487059 0.013504 0.463645 0.957231 \n", + "15 0.000000 0.013479 0.463646 0.957231 \n", + "16 0.691234 0.013479 0.463645 0.957232 \n", + "17 0.691234 0.013479 0.463645 0.957200 \n", + "18 0.619964 0.013483 0.463647 0.957231 \n", + "19 0.000000 0.013497 0.463646 0.957231 \n", + "20 0.685154 0.013482 0.463646 0.957232 \n", + "21 0.000000 0.013492 0.463647 0.957231 \n", + "22 0.330200 0.013479 0.463648 0.957231 \n", + "23 0.575217 0.013482 0.463654 0.957232 \n", + "24 0.653299 0.013479 0.463648 0.957232 \n", + "25 0.461856 0.013481 0.464095 0.957231 \n", + "26 0.000000 0.013516 0.463647 0.957230 \n", + "27 0.657514 0.013499 0.463646 0.957231 \n", + "28 0.629263 0.013480 0.463648 0.957231 \n", + "29 0.587975 0.013483 0.463649 0.957232 \n", + "... ... ... ... ... \n", + "38639 0.525910 0.013514 0.463645 0.957223 \n", + "38640 0.566779 0.013511 0.463646 0.957231 \n", + "38641 0.090991 0.013535 0.463645 0.957231 \n", + "38642 0.413883 0.013526 0.463646 0.957230 \n", + "38643 0.429328 0.013481 0.463650 0.957231 \n", + "38644 0.587261 0.013486 0.463647 0.957231 \n", + "38645 0.613279 0.013560 0.463642 0.957231 \n", + "38646 0.505208 0.013493 0.463666 0.957234 \n", + "38647 0.394241 0.013481 0.463655 0.957232 \n", + "38648 0.680784 0.013541 0.463645 0.957231 \n", + "38649 0.691234 0.013537 0.463645 0.957231 \n", + "38650 0.600616 0.013491 0.463648 0.957231 \n", + "38651 0.593243 0.013484 0.463650 0.957232 \n", + "38652 0.582750 0.013484 0.463646 0.957231 \n", + "38653 0.686320 0.013496 0.463646 0.957231 \n", + "38654 0.691234 0.013537 0.463645 0.957231 \n", + "38655 0.411172 0.013487 0.463645 0.957231 \n", + "38656 0.000000 0.013479 0.463646 0.957232 \n", + "38657 0.674104 0.013479 0.463647 0.957231 \n", + "38658 0.095417 0.013479 0.463652 0.957239 \n", + "38659 0.622888 0.013511 0.463645 0.957231 \n", + "38660 0.685894 0.013525 0.463645 0.957231 \n", + "38661 0.683578 0.013483 0.463646 0.957232 \n", + "38662 0.470875 0.013508 0.463646 0.957231 \n", + "38663 0.682793 0.013480 0.463656 0.957233 \n", + "38664 0.627631 0.013479 0.463655 0.957232 \n", + "38665 0.660530 0.013723 0.463644 0.957231 \n", + "38666 0.321586 0.013480 0.463650 0.957232 \n", + "38667 0.589169 0.013482 0.463648 0.957232 \n", + "38668 0.360782 0.013522 0.463647 0.957231 \n", + "\n", + " Total_Expenses Filename \n", + "0 0.000068 201523229349300327.xml \n", + "1 0.000004 201543089349301829.xml \n", + "2 0.000056 201533179349306298.xml \n", + "3 0.000007 201533209349304768.xml \n", + "4 0.000004 201533179349307343.xml \n", + "5 0.000089 201533189349300608.xml \n", + "6 0.000031 201523069349301367.xml \n", + "7 0.000209 201533069349300963.xml \n", + "8 0.000068 201523099349300542.xml \n", + "9 0.000015 201533099349301033.xml \n", + "10 0.000005 201523169349304367.xml \n", + "11 0.000082 201533099349301803.xml \n", + "12 0.000044 201523069349300142.xml \n", + "13 0.000020 201543039349301204.xml \n", + "14 0.000031 201523089349301462.xml \n", + "15 0.000112 201533069349300788.xml \n", + "16 0.000004 201533079349300238.xml \n", + "17 0.000168 201523209349314257.xml \n", + "18 0.000569 201523209349311332.xml \n", + "19 0.000062 201533179349302173.xml \n", + "20 0.000027 201533179349307048.xml \n", + "21 0.001072 201533209349302633.xml \n", + "22 0.000119 201533099349301113.xml \n", + "23 0.000200 201523039349300127.xml \n", + "24 0.000181 201523079349301652.xml \n", + "25 0.000162 201533039349300813.xml \n", + "26 0.002874 201533139349300208.xml \n", + "27 0.004864 201533069349301413.xml \n", + "28 0.000048 201533079349300003.xml \n", + "29 0.000098 201523069349300957.xml \n", + "... ... ... \n", + "38639 0.000107 201533099349301698.xml \n", + "38640 0.000051 201543119349300204.xml \n", + "38641 0.000333 201543139349302814.xml \n", + "38642 0.000129 201533219349301213.xml \n", + "38643 0.000100 201543179349306629.xml \n", + "38644 0.000036 201533209349316263.xml \n", + "38645 0.000021 201543079349301044.xml \n", + "38646 0.000118 201533139349300123.xml \n", + "38647 0.000085 201543179349305429.xml \n", + "38648 0.000891 201543179349308719.xml \n", + "38649 0.000005 201533179349302373.xml \n", + "38650 0.001629 201620149349300127.xml \n", + "38651 0.000936 201630129349300803.xml \n", + "38652 0.000101 201620119349300422.xml \n", + "38653 0.000526 201630129349300723.xml \n", + "38654 0.000040 201610139349300811.xml \n", + "38655 0.000217 201600199349300500.xml \n", + "38656 0.000059 201600399349300955.xml \n", + "38657 0.000034 201600369349301750.xml \n", + "38658 0.000022 201610399349300226.xml \n", + "38659 0.003509 201620359349300612.xml \n", + "38660 0.000075 201610429349302361.xml \n", + "38661 0.000033 201610369349301226.xml \n", + "38662 0.000044 201640359349300129.xml \n", + "38663 0.000317 201600429349301735.xml \n", + "38664 0.000034 201600359349302005.xml \n", + "38665 0.000022 201600419349301130.xml \n", + "38666 0.000033 201640359349301614.xml \n", + "38667 0.000027 201600429349302135.xml \n", + "38668 0.707131 201610229349300741.xml \n", + "\n", + "[38669 rows x 6 columns]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:6: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:10: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" + ] + } + ], + "source": [ + "df2 = df.copy()\n", + "df2.reset_index(inplace=True, drop = True)\n", + "print(norm_df2)\n", + "Y_class_df = pd.DataFrame()\n", + "X_class_df=norm_df.loc[lst_temp]\n", + "X_class_df['Efficiency'] = 1\n", + "\n", + "\n", + "Y_class_df['Efficiency'] = X_class_df['Efficiency'] \n", + "X_class_df.drop('Efficiency', axis=1, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "new_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]\n", + "new_df.reset_index(inplace=True,drop=True)\n", + "X_class_df=X_class_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]\n", + "# X_class_df=X_class_df.drop(X_class_df.index[2]) #OUTLIER REMOVER\n", + "X_class_df.reset_index(inplace=True,drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from sklearn.cluster import DBSCAN\n", + "from sklearn import metrics\n", + "from sklearn.datasets.samples_generator import make_blobs\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.decomposition import TruncatedSVD\n", + "\n", + "svd = TruncatedSVD(n_components=2, n_iter=7)\n", + "X = svd.fit_transform(new_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "db = DBSCAN(eps=0.1, min_samples=10).fit(X)\n", + "core_samples_mask = np.zeros_like(db.labels_, dtype=bool)\n", + "core_samples_mask[db.core_sample_indices_] = True\n", + "labels = db.labels_\n", + "n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAFyCAYAAABGCPg8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzs3Xl8lNX1+PHPIewYIOxbYrAiorUi+brLGpCvCggIsqjw\nU79VShWkpRVE61KsWhWEKoKCgloRoaKgtcgOWmUJixviBiRE9gxhgLAk3N8fZ4ZMhkkySWYIIef9\neuUV8szz3OfOJGRO7j33XHHOYYwxxhgTKRVKuwPGGGOMObtYcGGMMcaYiLLgwhhjjDERZcGFMcYY\nYyLKggtjjDHGRJQFF8YYY4yJKAsujDHGGBNRFlwYY4wxJqIsuDDGGGNMRFlwYcosEVkmIktLux+R\nJCLnisgJERlU2n0pqoC+/6G0+xIOEakhIlNFZIev3+NK2N4yEVkSqf4ZU5ZZcGEiTkQG+35Zh/rI\nEZEritBWKxF5VEQSQjzsgBOR63n4ROR3IjK4NO5tImYMMAh4CbgdeLOE7UV1LwURGS0iN0fzHpEi\nIo1E5GkRWSIiB3z/99uVdr/M6VOxtDtgzloOeATYGuKxH4vQzkXAo8BSIDXosS7F6llkDAX2ADNK\nsQ+mZDoCXzjnxpZ2R8L0EDAb+KC0OxKGlsCfgB+AL4GrS7c75nSz4MJE03+cc+tK2IaQz1+Ezrns\nErZtyiARqe6cOxyBphoA30SgnTJLRKoAx1zkd7BcC9R1zu0XkVuw4KLcsWkRU6pEpL+IrPUNnWaK\nyJcicr/vscHAu75TlwVMq7TzPZ5njltE2vvO6eubStnua3e2iMSKSGUReUFEdomIV0ReE5FKQf25\nU0QW+845IiLfiMiQoHO2ABcDHQKmewL7Uct3n1RfGz+IyJ9FRILaqSUi00Vkv4h4ROR1oHaYr5t/\n6ukaERknIrtF5KCIvCcidYPOPSEifwnRxlYReS1Em9eKyERfmx4RmSwiFX39fUNEMnwfzxTQvwd8\n7R/2fZ8uDnFOSxGZIyL7RCRLRNaISPd8nmc7EZkkIruAtEJem/oiMk1Edvra3SABOSz+nxMgEegW\n8HMVauotsN3bRWSViBzyPf/lItK5gPP/n6/thKDj/p/TdgHHzheRf4nmf2SJSJqIzBSRWN/jJ4Dq\ngL/NE0Hfuya+n+edvp+5r0Xkznzu209ExorIduAQEOv7/j4qIt/77r9XRFaKSHLA9RV937NGBb1O\nAM65Q865/YWdZ85eNnJhoqlW8Bsd4JxzGQAi0gV4G1gI/Nn3eCvgWuAfwApgInA/MBb4znfOJn9b\n+dx3NHAYeAo433f9cTQ/ozY6zXIVMBj42de23xDga3ToORvoDkwSEXHOvew7ZzjwIuD1XSvALt9z\nqubrd2NgMvpGeI2vL42AwGTHeb7HXvY9t17oNEtR/or8B5ABPIa+WY7w9W1AGNfmd59/ADuAv6Cv\n02+B/b6+bkNf3xuBkSLylXPuraDrBwPn+PpRFX29FovIJc65PQC+YONTYDv62hwCbgXeF5Hezrng\nof9JwG7gcaBGfk9IRKoCy4HzfM9jK9AXmC4itZxz/wC+RXMsXkC/P8/7Lt9TQLuPoj83n6HTfceA\nK4FOwKJ8LnPk/xqfPC4a4H4CVEJ/3ncCTYFu6M+r19ffacAq4BXfpT/5rm/gO57ju34vcAMwTURi\nnXMTg+79CHAUeBao7HsujwOjfG2vAWoC/wO0ARb7rmuK/t+bDtyVz/MyRjnn7MM+IvqBvrmcyOfj\ncMB54wFPIW3dgv7SbBfisaXAkoCv2/vusRGICTj+T18bHwZd/xnwc9CxKiHu8zHwQ9CxrwLvHXD8\nYeAAcF7Q8b+hv8Sb+r6+2dfXPwScI+gbYw4wKMzX+D9Bx5/33Sc24NgJ4C8h2tgCvBaizY9CvE45\nwIsBxyqgOTCBr/+5vusPAo0Cjl/uO/5cwLFFwHqgYtC9PgW+C9GnZYCE8bM33NfX/gHHYnzPIROo\nEfT854XR5q/QQHN2IecF/zwO9vUlIei89oE/08ClvufYq5D2vYHfr4DjU9EgrXbQ8bfRwLNK0P+P\nH4DKQeeuL+y18H1/c4Bphb1mQdfl+3/YPs7eD5sWMdHigN8BnYM+bgg4Zz9QQ0S6RvjeM5xzOQFf\nr/J9fi3ovFVAvIic/H/gnDvq/7eI1PSNvKwAzvMPUReiD7ASyBSRuv4P9K+/ioB/KPxGdDRlcsC9\nHfrXthAeR+5fsX4r0TfTc8NsI1SboV4nAo87506g8+rnhWhjrnNuZ8C5a3xt3AggInFoMuVsfKNb\nAa/TJ0ALEWkc1KdXfa9PYW4Adjrn3gm4v/8v+nPQN9ii6oV+T54oxrXhyPR9/l/fyFdR9QbmAzEh\nXsta6OhDoOnOuWNBx/YDF4vI+fndxDm3zTkX45y7uxh9NOWMTYuYaFrjCk7onIQOWf9bRH5Bfxm+\n65xbUML7Bs/JZxZwvAL6C9gDICLXokPEV6Fz3H7Od563kHu3AC4h9BC7Q5MIARKAHe7UxMTNhbQf\nLPg5eXyf44rYTqDgVTkFvX6h7hNqNdD36PcadKpKgL+Sd0rKz/867Qg4tjX/7uZxLvqXebBNvnsW\nJ+g6D/2Lf1NhJxaHc26riDyPTpndLiIr0Smzt5xzBwq6VkTqo1Mn9wD3hmqe3J85v60hzvsL8D7w\nvYh8DfwHeNM591VRnosxfhZcmFLjnNsjIq2BruhfnDcAd4rIDOfcnQVfXaCcIh4XABE5Dx2u34Tm\nLqShUww3AQ8QXgJ0BTSH5BlCj0B8H0YbRVHgcypETBHbDHU83FGWQP7X8Tkgv0AyOEDJKsZ9Slt+\nIy2nvO7OuT+JyHR0uux6dKRllIhc5Zz7pYB7+F/Lt8h/WfSXQV+f8lo651aKyK8C7n83MEJE7nXO\nBY9kGVMoCy5MqXK6nPQj3wci8jJwj4j81Tn3M1EuTBSkO5rg1t05l+4/GJgxHyC/fv0EnOOcK6xy\n6Dagk5y6rPLConQ4TB6CVqH4kggbhz69xFqEOHYBuX8x/+z7fNw5F+mKltvQkaNgrQIeL6qf0Dfx\nizj1jbog/lGk2uQdDUoMdbJz7ht0aezfROQq4L9ogrF/pU+on7k96GhaTElfS6erO2YAM0SkOjrF\n9hinTpMZUyjLuTClRkTqhDjsH4at4vt8CP3rOKwlmiXk/8v85P8LEakF/L8Q5x7Kp0/vAleLyPXB\nD4gu5fS3/W90dcDvAh6vgK5siXRA9RO5uR5+95L/yEVJ9RSRJv4vRCuyXok+Z5yuGFkG3BtqWaOI\n1CvBvf8NNBKRfgHtxaCvqxdNmC2q99HvyV9EpCgjNT+hP7uBS04roFMYBByL9fUx0DfoVEyVgGOn\n/Mz5cl/+BdwioZf7hvVaBv9f9AW8PwbevyhLUY2xkQsTLQLcKCKtQjz2mXNuKzDV90ttCZrtngjc\nB6x3zvnntzegb/oPikhtdAndYufc3mL0pzCfoEmWH4rIFCAW+D90mWnwL9QUYIiIjEF/Ce/2jVY8\nC/TwtTHdd14N4Ddo4l0imsE/H13B8LSINEeXR/b23bOkzyn4+FRgsojMQadsLkWHvkPlhRRnmiPY\nj8CnvlEo/1LUPehr4/d79C/jr0TkVXQ0oyFabKkpcFkx+/QKGjhNF5H/IXcp6tXAcOfcoaI+Gefc\nTyLyJLoSaKWIvIf+HF4OpDvnxuRz3bci8gX6Pa6Lft/7c+ofdZ2AF0VkNjptVhEtS56NBg5+KUBn\nERkB/AJscc6tRpeQdgBW+V7Lb4E6QJKv7XACjG9FZJnvHhm+59YHnZ7xK9JSVBF5GA3KLka/h4NE\npC2Ac+7JMPpkyrLSXq5iH2ffB7lL8PL7GOQ7rxe6zHMHOg+8Bd3noUFQe3ehSXrHyLuEbykaaPjP\n8y/x651Pf9oEHX/Ud7xOwLGb0GV5h9C/PP+IjlzkWVKIJsnNQ7Psc8i7BLE6mqi42fe8dqFvpA+Q\nd4lsbfQXtQf9hf46GoSEuxQ11HPKs8zRd0zQpbC70L/ePwKao2/o04rzOvmOvw5kBnztX6o4wvdc\nt6L1RpYCvw7xHBJ9baQDR9Cpgw8IWJKZX58KeW3qoQHVLt/rvwG4I8R5PwMfFPHneq3vOe1Fg+JO\nAY/n+XkMeI4LfNf8gq446UTen+NE4FU0sDiEBmKLgA5BbV3gu8dB3/WvBT3nib7X/IjvNf0EuKuw\n/x++x0YDnwP7fO1/AzwY9PNapKWo6MhLqP//2afz95F9lM6H+H4IjDHGGGMiIuo5FyLyexHZ4isp\n+4WIXF7I+beJlus9JCK/iJbxDTU3b4wxxpgzUFSDC19S1fPosOplaOXEBfklGflqDMxAhwgvQuf8\nruDUQkHGGGOMOUNFdVrEl8y0yjk33Pe1oLUDJjrn/h7i/D8CQ5xzLQKO3Qf82TlX4KZCxhhjjDkz\nRG3kwreOPoncTW9wGsksIv/tdz9HyzHf4GujIZrp/VG0+mmMMcaYyIrmUtR66Dr6XUHHdwEtQ13g\nnPuviNwOzPLtblgRzci/L7+b+JZ4dSU3S9oYY4wx4amKb1WTc25fpBo9o+pciMhFwAS0KtwnaAXB\n54ApaL2BULqiu14aY4wxpnhuQ3fSjYhoBhd70TXNDYOONwR2nno6oMVgPnPOjfN9/bWIDEUL14xx\nzgWPgoCvpPBbb71Fq1ah6jWdPUaMGMH48eNLuxunRXl5rqfreTrnKFpxyciy7+fZxZ7n2WPTpk3c\nfvvtEP7mgGGJWnDhnDsuIilAMjq14U/oTCZv1bdA1dFCSYFOoFXe8vvNeASgVatWtGkTvLPw2aVW\nrVpn/XP0Ky/PNZrP0+v1MmbMGObPn8/x48epVKkS3bt358knnyQ2tiiFQEvOvp9nF3ueZ6WIphVE\ne1pkHFqGNwVYjVbuq45WJUREngKaOOcG+86fD7wiIkPQqnZNgPHoipP8RjuMMUG8Xi9XX301mzZt\n4sSJEyePv/TSSyxZsoTPP//8tAcYxpjyI6p1Lpxz7wIj0ZK369HSxl2dblwEul9DfMD5M4A/oPsO\nfAXMQmvZ3xLNfhpzthkzZswpgQXAiRMn2LRpEw8//HAp9cwYUx5EvUKnc26Scy7ROVfNOXe1c25t\nwGN3Ouc6BZ3/knPuEufcOc65Zs65wc65HdHupzFlQbh1aebPn39KYOF34sQJ5s2bF8luGWNMHrbl\nehkyYMCA0u7CaVNenms4z9Pr9TJs2DCaN29OfHw8zZs3Z9iwYXi93pDnO+c4fvx4gW0eP3487EAl\nEuz7eXax52kKU+Y3LhORNkBKSkpKeUq8MeVEfrkTFSpUoFWrVvnmTjRv3pytW7fm225iYiJbtmyJ\nRpeNMWXIunXrSEpKAkhyzq2LVLs2cmHMGay4uRPdu3enQoXQ/70rVKhAjx49It5XY4zxs+DCmDNY\ncXMnnnzySVq1anVKgOEf8Rg7dmzE+2qMMX4WXBhzhipJ7kRsbCyff/459913H4mJiTRt2pTExETu\nu+8+W4ZqjIm6M6r8tzEml4hQqVKlAs+pVKlSvpU3Y2NjmTBhAhMmTCj1Cp3GmPLFRi6MOYNFKnfC\nAgtjzOlkwYUxZzDLnTDGlEUWXBhTiNJcrm25E8aYsshyLkyZ4PF4eHPWLL7buhWP10tcbCwXJiZy\nR79+xMXFRfx+Xq+XMU89xfzlyzlepQqVjh6le/v2PDl69Gl/Q7fcCWNMWWPBhTmjpaSk8OiECWxI\nT2dn27bk/OpXUL06HD5MTHo6f+/Th8uaNeOxYcP8hWBKzOv1cnW3bmy66SZOjB0LIuAcL61Zw5Ju\n3fj8ww9LbcTAAgtjTFlgwYU5I2VnZ9P/3ntZ5vWyr3t3iI8/5ZycFi1I79CB9LQ0Pn/mGTrExvLO\nlClUrFiyH+sxTz2lgcUVV+QeFOHEFVewCXj46aeZ8OSTJbqHMcaczSznwpxxsrOzSe7Th48SE9k3\ndGjIwCKP+Hj2DR3KR+eeS+e+fcnOzi7R/ecvX86Jyy8P+diJyy9n3vLlJWrfGGPOdjZyYU6bcPMm\n+t97L6uTkjjStm2R2j/Srh2rgAFDhjB76tRi9dE5x/EqVXQqJBQRjleubLkPxhhTAAsuTNT4g4mV\na9bw2fr1ZB45QlaLFriOHeGSS0LmTTjnWOb1Fjmw8DvSrh1LJ00iJSWlWDkYIkKlo0fBudABhnNU\nOnrUAgtjjCmABRcmIrZs2cKw0aNZu2kTmQcPcuzYMcjJIad3b7jqKujUCQ4fhu3bYc4cqF0buncn\np0OHPHkTlTZvZt/IkSXqy75u3Xh84kTmzZhRrOu7t2/PS2vW5M258KmwZg09OnQoUf+MMeZsZ8GF\nKbLA6Y01n3/O+p9+Iqd2bejdG5KTT67mIC0N5s6FxYuhVy+4/npo0QI6dsx97KOPYPhwzZu44w54\n4onCcywKk5DAurQ09u/fT+3atYt8+ZOjR7OkWzc2oTkW/tUiFdasodVHHzH2ww9L1j9jjDnLSWkW\nCIoEEWkDpKSkpNCmTZvS7s5Z6fLLL2ft2rVwzjn64Z8SOHgQvF547jkoaAoiLQ3efhu2bIGJE6Fy\n5dzHli2D5cvh4Yfh44+hRg0NPkooZulSxjVuzLAhQ4p1vdfr5eGnn2be8uUcr1yZSseO0aN9e8aO\nGmWFq4wxZ41169b5p5CTnHPrItWujVyYU3g8HqZMn87oUaPg2DFo1AguvlhHJuLj845MvPcePP88\n7NgBo0fr6ESw+Hh48EEdwbj3XpgyJTfA8E8xTJwItWpBMXMtguU0bcrmrVuLfX1sbCwTnnySCWDJ\nm8YYU0QWXJiTUlJSeODxx/nvTz9x4ttvoX59aNMGbrst9FRFixaaS5GWBv/8J0ydCk89BQsW5B2d\n8EtO1s/Dh8PLL+ce79ABvvwSdu3SwCUSqldn/4EDEWnKAgtjjCkaq3NRznk8Hsa/9BLnXXEFV4we\nzafduuUGFkOGwKhRYdWZYNQoHZVo0AC6doWvvgp9bnIyJCbCJ5/kPd6zp45+HD4ckefF4cPUrlkz\nMm0ZY4wpEhu5KKf8ZbXXp6Xxi8cDN9+suQ63354bWHTqVLRG/SMTU6bAM8/octORIyEmJu95AwbA\n3/6WdwolIUGnYLZv1xGREopJT6dlQkKJ2zHGGFN0NnJRzmRnZ9Pn7rvp+swzfNSlC7/ExupKjo4d\nYcYMSE/XqZCiBhZ+ycl6fXo6VKqkoxnffpv3nIQEfWznzrzHu3fX1SMR0GjlSgb17x+RtowxxhSN\nBRflyClltQ8fhooVoX17PWHRIk3evO22kt1o4EBtJyVF7zF2rB67+27Nt/jhB50GmT4973UtWujo\nRVpaye6fmkqb+PhiLUM1xhhTcjYtUo6cUlZ73jzw/3X/ww86JXHxxRGpM0GdOhooDBwIzZrlrjBJ\nTYUXXoADB+Do0bzXVa+uORtz58KwYcW+fd0PP+SxUaNK9hyMMcYUm41clBNr167NW1bb64XMzNxA\n4sUXITZWl5tGQu/ekJ2t0y0tWkDTpvo5ORleeklzLtq0gVtvzU3uPHxYz8vO1toXxVB1xQo61qxp\nNU+MMaYU2chFOfHYxIm6dbnf8uW5CZigb+znnFPyUQu/+HjIySn48VGjtPbFK69ocHPuuTpFM2iQ\nTqUcOwZduoR9y6orVnDl+vXMnD07Ak/AGGNMcdnIRTng8XjYkJ6eN3DYvVunK/xycrTyZgTrTBBO\n9dfkZLjnHj2/enXN+xg8GK64gsbz51N30qTCczBSU6k7aRLdUlNZNHs2FStazGyMMaUp6r+FReT3\nwEigEbARuN85t6aA8ysDjwK3+a75BXjCOTc92n09W705axY7gytfZmXlDSRiYjQYiGCdiXy3LQ+W\nnAxr1sChQ3pd7drwzjvsyMxk1E038c2iRaxLS2Nn27bkNG16Mn8jJj2dRitX0iYhgcdGjbKpEGOM\nOUNENbgQkX7A88A9wGpgBLBARC5wzu3N57LZQH3gTuAnoDE2wlIi323dSs6vfpX3YLVqeQOJ6tW1\nQmZaWkTqTJCWdmp9i4IMHKj7i/Tvr8tRzz8ftm3j6TfegEOH6NimDX9u3JjNW7ey/8ABatesScuE\nBO7417+Ii4sreX+NMcZETLRHLkYAU5xzbwCIyBDgJuAu4O/BJ4vI/wJtgfOcc/t9h1Oj3Meznsfr\nPXW6o0GDvAWr7rtPpyfee6/4NS4Cvfce/OEP4Z+fkKAJpYsWaTJoQkKeFSZL33uPpY89Ru0TJxhw\n110aWPTrZ4GFMcacgaI2IiAilYAkYLH/mNMtWBcBV+dzWXdgLfCgiGwXkc0i8qyIVI1WP8uDuNjY\nU6c72rfXZEq/Fi00B2PfvojUmcDj0QqdRdG7twY9ycmhV5g8+yz7L72Ul6dNY/i4cVzSpw/dBw8m\nJSWlZP01xhgTUdGcbqgHxAC7go7vQnMpQjkPHbm4GOgJDAf6AC9FqY/lwoWJicSkp+c9GBuru5AG\nBhKdO2vVzH/+s2Q3fPttuPzyol8XHw8ZGaEf83ph40Zo3BhatgSvl/SUFD5cv57rn36aPnffTXZ2\ndsn6bYwxJiLOtLT6CsAJYKBz7iCAiPwBmC0iQ51zR/O7cMSIEdSqVSvPsQEDBjBgwIBo9rdMuKNf\nP/7epw/p/u3N/Xr0yFuwavBgWLgQ1q2DJUuKNz2yeDFs2ADvvFP0a6tXP7Ww1ubNWuwrM1NHMNq2\n1Y3RDh+Gbdtg7lwyVq7kXxkZtExJYfPatbZaxBhjQpg5cyYzZ87McywzMzMq94rmb+G9QA7QMOh4\nQ2DnqacDsANI9wcWPpsAAZqhCZ4hjR8/3lYL5CMuLo7WTZuSnpaWdzlqy5aaPLl8eW4J8FdfhRtv\n1M3HnMtbC6Mw/poV06YVr6OHD0OVKvrvnByYMEHLk/fvn/+W75076+jLm2/y88aNVKpdm45XXMGS\nJUuK1wdjjDlLhfqDe926dSQlJUX8XlGbFnHOHQdSgJPvTiIivq//m89lnwFNRCQw+7AlOpqxPUpd\nLRceHz6cuvPnn/rA8OGwbJl+gK4i+fe/tQ7GlCnw9NNh1Zng6ac1MJk2TYtxFUdampYNz8mBv/4V\nkpJ0VCWcLd8fekh3cm3YkKWbNyMNGnDR5ZczcfJkPB5P8fpjjDGmWMSFU+iouI2L3ApMB4aQuxS1\nD3Chc26PiDwFNHHODfadXwP4FvgCeAxdkvoqsNQ5NySfe7QBUlJSUmzkohB97r6bjxITc0uA+wWO\nEvTqlftmPmOGbi7WqJG+6ffurY/5V3GkpemqEI9HcyxGjChZB++/H8aM0ZyNpKTc0ZSiWLYMPv5Y\nl8FWraorYoB2F13EuEceiUqEXhQej4c3Z83iu61b8Xi9xMXGcmFioq18McaUioCRiyTn3LpItRvV\nyWnn3LsiUg94Ap0O2QB0dc7t8Z3SCIgPOP+QiHQB/gGsAfYBs4BHotnP8uKdKVPo3Lcvq5zjSLt2\nuQ/ExOiy0c2bYdYsDRaSk+Hqq/XzkiXw+us6QlGjBlSooFMmhw7p3iHZ2dCnT8k6l5oKx49rbkXg\nTq1F1aEDfPkl1K2roy+33AKvv86KNWv4n27dOL9xYzatXn3a8zJSUlJ4dMIENqSnazGwX/0qTzGw\nv/fpw2XNmvHYsGGlHgAZY0xJRXXk4nSwkYuiyc7Opv+997LM69W9RkJNOXi9mofx889U+P578Hg4\nceCABhFVq+rUSZMmcNllWnjrpptgwYIS7WTKM8/oaMX69fnnWIQrNRXefVf/XaeOjrB8/70GLVlZ\ncPw4Dw0dypNPPln8e4QprNfbLy2NuvPn0yE2lnemTLHEVGNM1EVr5MIqX5YzFStWZM60aSx48EG6\nL1pE0yeeIGbpUn3z3b4dvv+emLVrabpuHd1r1mTNG2+Qk56O83pxWVn886WXYMcO2LIFvvgCfvxR\nryvBTqYsXgxbt8KVV+bdqbW4srJg0ybYs0c3Q7v5ZrjhBmjdWpNYa9fmb6++itSsSdeuXUt2rwJk\nZ2eT3KcPHyUmsm/o0LByR/YNHcpH555L5759bWmtMabMspGLcs6fA7A5NTVvWe1CcgA8Hg9NmjTh\nSGwsVK6sy1j/+1+dzghe8lqQxYvhrbc0efSTT3TapWPH4j2Z4NyRw4fzLmNt1iw3X2T7dnj/fZ06\nycqiFrBly5aI5j3km+MShqorVtAtNZXZU6dGrD/GGBOsTOZcmDNfXFwcw4aEzJUt9LqsrCwAbr75\nZuZNngyXXgqffqo5D4GJoaGkpsLMmTpiMWWKBii7d2sdi+LwrzDp2BGuuy68ZawdO+qUyYwZZH71\nFXWaN6eyc/zn/ffpWNwAx2ft2rUs83qLFVgAHGnXjqWTJpGSkmI5GMaYMseCC1NiH3zwAQC33nor\ns5cv19GB7dt1V9SuXfOOGKSm6gqTzEy480548MHchoJ3ai2KCRNyAwt/kBFOUmh8vG6YtnAhvPsu\nx+Li6PS738G+fSycOZPOnTsXqzuPTZyoORYlsK9bNx6fOJF5M2aUqB1jjDndLLgwEfOuL4ly8ODB\nvDF/vhbEmjw5tzBW5cpaA2PECJ2SaNkybwPBO7WGa/Pm3BUm48aFH1gE6tJFg6F33oEjR6BGDbrc\nfjtkZfHcX/7CH//4x7Cb8ng8bEhPL3nuSEIC69LS2L9/P7Vr1y5ZW8YYcxpZQqeJuBkzZuAyMhh7\n3326vFQEGjbUbdV//3sNIlq31mWvgfw7tRbVvHk6DRMYZBRH585w8cVajGvMGN0l9oILGDlxIlKv\nHp3CLIf+5qxZ7Czu9E6QnW3b8kZxSqkbY0wpsuDCRM2YMWNwHg8/L1/ORaA7mz7yCPzpT5r8uWtX\nbmVQOHWn1nB4vbkrTPxBRknccouugmnRQvdWefZZGDsWrr2Wpdu3I/Xrc3GbNgVW/fxu61ZymjYt\nWT98cpo2ZXNqakTaMsaY08WCCxN1zZs355u1a3EZGTwwcKBuTvbVV7oz6yef5AYYoXZqLczy5boS\nJDDIKImEBC0idjBge5v4eJ3KufVWqFGDb3fupM4ll1CjWbOQ2717vN7i544Eq16d/QcORKYtY4w5\nTSy4MKf8sqBjAAAgAElEQVTV+PHjcRkZ9GzbVkcIfvgBpk6F557ToMK/U2u4du/WhFF/kBEJycl5\nR1T8OneG22/XvJELLuBwpUr8T8+eSGwsa9euPXlaXGxs8XJHQjl8mNo1a0amLWOMOU0suDClYu7c\nubjDh/nj4MGwd6++mf/5z/DCC5CeDkuXhteQf4WJP8iIhGbNtABXKJ07Q6tWcM01MGiQntu4MZd3\n6YLUqMFDDz3EhYmJxKSnR6QrMenptExIiEhbxhhzulhwYUrVc889hztyhAd++1tdpZGeromZ/qJa\nhfGvMCnJMtZg/mWz+bnlFtiwQVelPPWU5pFcey3ExvLU1KkMHzGCWvPmRaQrjVauZFD//hFpyxhj\nThcLLswZYfz48TiPh/UrVlDh6FHNeZg6FZ58suAcDP8Kk+IuYw3l8OGCA5XgvIz4eBg5En77W50y\nufpqMrZv1z1XNm8ufj9SU2kTH2/LUI0xZY7VuTBnlNatW5Pjq/x50UUXsenTTzX5s0ED6Nnz1BLe\nK1fqiEeXLvp1ixYl78T27VC/fsHn+PMyunXLe+zECfjXv+CCC2DbNhg9WneQTU6G++4rUjfqfvgh\nj40aVfT+G2NMKbPgwpyxvv32WwBuuOEG/rNokU6ZxMbqSEGdOhoA9O+vha/OP1/3KClh2W5Al8OO\nHl3wOc2awWefnXq8SxdYu1bLmDdsqAHGe+/p0tsFC7T/bdvqc2jfXr8OoeqKFXSsWdP2yzHGlEkW\nXJgz3scffwzAihUraN+lC+zfrxucVaumu7PWqwf//nfuMtaSbtceF6fTGwUpKC/jttvg7bdh1Cgd\nSencWfs1a5bmaixZolMq//qXFu3q3j1PtdKqK1Zw5fr1zJw9u/jPwxhjSpHlXJgyo127drijR3H7\n99M9KUlXiGzcCF9/DatW6UhBUZaxhvL++7octjAF5WUkJOhqk+BaGSNHwv/9nwYc116rSajLl+ve\nJuPGwZYt1J00iW6pqSyaPZuKFS32N8aUTRZcmDJp3rx5uAMHuKVDB9i5U3dF/egjnTpZvrx4jS5b\npu1ccEHh5xaWl3HDDaFrZSQlQc2aGhA5p9MilSrptMnvf8+rAwYwe+pUCyyMMWWa/QYzZdqcOXMA\nWLJkCcmdO0N2NsyYoYmVRcm/WLYsdxQhHIXlZSQk5M3J2LwZ3nxTczAqVtSRjBtu0M/+KZZt2+j9\n0ENwzz2MfeABxowZE37/jTHmDGLBhTkrdOrUCXfiBCkpKSR37Urmyy/raMCgQQXnYKSm6lRITo4G\nFjExhd8snLwMf8CQkwPjx8OPP+oGbm3aQJ8+ofsUkJ/x8IwZPPz889zRrRtvvPFG4X0yxpgziAUX\n5qySlJTE/r172bNnDw0aN4Yvv9SEz169dDQhcBnr4sUaJPToEd5UiF84eRmHD2vC6RNP6J4n1arp\nUtpwdmyNj9dA55NPeHPaNN5s0IBrL7iATz/9NPw+GmNMKbLgwpyV6tevj8vOJiUlhauvu47jL7+c\nu4x13z5dujpqVL5LQfMVbl7G9u3wzTdQubIGFv/7v0XfCv766/Xza6/x2bZtSO3aNK5enV9++aVo\n7RhjzGlmCZ3mrJaUlMSxrCwyfvyRC2NjYd06XcmxerVOmxSFPy9j2LDCz12wQIOKqlWhceOiBxZ+\n118Pl18OgwfD/fezo3ZtpGFDYipWZMuWLcVr0xhjosyCC1MuxMXFsemrr3CHDnFXjx6QkQGvvAJj\nxxa+xXtqKkycCOvXh5eXkZqqUyGVK2ueRa9eJet8375aHyM5GSZNguee40SnTpx31VWICJe1a8fQ\nUaOYOHkyHo+nZPcyxpgIsGkRU+5MmzaNadOmMWTIEKbMmKG1MurVg9694dxzS56X8d57uiLk8OHc\nlSElkZCgwdDBg5pEGh8PDz2kG7u9/jobvvuODRs2QIsW/H32bC5r1ozHhg0jKSmpZPc1xphiEudc\nafehRESkDZCSkpJipZJNsbz99tvcdvfdOtJwzjm6jPWii+Cyywos0R3S0qVaLbRxY/36sssiU5J8\n0SLdQyVwLxP//Vau1C3g33tPt6/PyqLuDTfQITaWd6ZMsZoZxph8rVu3zv+HSJJzbl2k2rVpEVPu\nDRw4EJeVRcbWrTSrXFlrZXz9NVSpUvTAYsUKLeUtoh/NmkWmk/6qn8E6doTatXU0Y9IkeP55uPJK\n9i1cyL/mzKFz375kZ2dHpg/GGBMmCy6M8YmLiyNtyxbcnj20b9UKJk8uWk7Ghg2ak3H0qAYo2dkF\nb91eFAXtZdKzJ8yfr//2L2P93e+gShWWL1hA9cL2STHGmAiL+nipiPweGAk0AjYC9zvn1oRx3bXA\nMuAr55zNd5jTapmvdPd1113HZ3/8I9StG35ORrVqcOyY/ju/gKCoCtvLxOPJzcmAPMtYj1eogNSr\nR0xWFj98/TXNmzePTJ+MMSYfUQ0uRKQf8DxwD7AaGAEsEJELnHN7C7iuFjADWAQ0jGYfjSmIv3DV\n0KFDeXniRN2NtWpV3Q+kVavQtTIaNNCdW0EDkBYtSt6R1NSC9zJJTtalsoE5Gddfr0XErroKGjYk\nZ9s2zrvpJtizh6G33spLL71U8n4ZY0wI0Z4WGQFMcc694Zz7DhgCHAbuKuS6ycA/gS+i3D9jwjJp\n0iSc18vA5GTYtUtXb6xZA1+E+BFt3153bN29W0c2ImHJEujQIf/HmzULnZNx663aR39p8UmTYNw4\nJm3ejDRoQKtWrSLTP2OMCRC14EJEKgFJwMnfrk6XpiwCri7gujuB5sDj0eqbMcX1z3/+E5eVxfK5\nc3V0YsqUU/MyYmN1GqV6dV2BUljORmFSU6FOnfD2MgkWuIzVz5+XcffdfJeZidSuTcVq1awolzEm\nYqI5clEPiAF2BR3fheZfnEJEWgB/A25zzp2IYt+MKZF27drhsrJwe/fS7vhx+OMfNYly0SL4/nud\nisjO1m3V584t2c3mzAlvL5P8cjI6dcq7/bvXCx9+qNvTn3ceVKtGTtWqnPfrXyPnnMO4ceNK1l9j\nTLl3xqwWEZEK6FTIo865n/yHS7FLxoRl+fLluN27ubllS3jxRXj0UXjtNdixQ9/0d+zQsuHFsXCh\nLmkNZy+T/HIy/MtYN2+GZ5+FZ57R3JE2beDCC+GKK+DXv9brq1blj3/5C1KnDsOHDy9en40x5V7U\nimj5pkUOA7c45+YFHJ8O1HLO9Qo6vxbgAbLJDSoq+P6dDVzvnFsW4j5tgJR27dpRq1atPI8NGDCA\nAQMGROopGROWQYMG8eY77+j0SKVKmuAZEwM331xw3kSwhQvhs8/gkUcKLzk+ZgyMHh166mTbNvj7\n37X+Rq9eGvDMm6clypOTNV8jcAXM++9rvsjhw1ChAle2bMkXoXJLjDFlysyZM5k5c2aeY5mZmaxY\nsQIiXEQrqhU6ReQLYJVzbrjvawFSgYnOuWeDzhUgOLvs90BH4BZgq3MuK8Q9rEKnOSM9/vjjPDZ2\nLNSsqctVK1XSEYg+fQouCZ6aCm+/rec/8EB4e5m8+y6MHHnqYzk5Wir8+us1sJkwQUuS9+pVcB/S\n0mDGDPjqK71/Vha/qluXH3/8MaznbowpG6JVoTPawcWtwHR0lYh/KWof4ELn3B4ReQpo4pwbnM/1\njwI3F1TnwoILc6abN28evW69lRP+BM9q1TTh84Yb9A3eP2qQmqojCnXqwMCB4e9lMnGibuke6vxx\n46B1a13B8te/akXPouzQunChrnhJTtYRjYwMzjl4EK/XG34bxpgzVrSCi6jWuXDOvSsi9YAn0HoV\nG4Cuzjn/mrlGQAl3dTLmzNajRw9yjhzB4/GQmJjIgawsTaqcMkUDi5wcDSjatoUnnyxayfFly/T6\nUIHF5s06StGpkwYZRQ0sALp00ZGLjRvhpZcgLY2Db72FNGgA+/bx4bx53HTTTUVr0xhz1ot6Qqdz\nbpJzLtE5V805d7Vzbm3AY3c65zoVcO3jVp3TnC3i4uLIzMzEZWaSlJioVTyd071Bdu/W6ZOiBhbL\nl8OwYaEfnzdPpz/8QUZRAwu/Tp00wFi/XoOMhg01f6NBA7oNGoTUrMk111xTvLaNMWcl2y7RmFKw\ndq3G2G3btuXTH37QAOPtt7Uw16BBhedkvP++jlg8/HDonAyvVxM24+N1hUj//iXrcM+e8MQTcNtt\nOsLStatO5WzbBgsW8Pn27UhcHI2qVmXHjh0lu5cxpsyz4MKYUrRy5UoAXnjhBUaMGQP79umOrPXr\na22LhITC9zIJZflyHXEIDDJKIiFBRywuvzzvihR/5c+0NHjrLXZ+8w1SqxYVDh4kJycHAI/Hw5uz\nZvHd1q14vF7iYmO5MDGRO/r1Iy4urmT9MsackaKa0Hk6WEKnOZt4PB4SmzfnwMGDUKuWbvtetaoW\nu/InZoYzdfL445oUunmz1rTo2LHknVuyRIOcwP1Lgn3yCUybpjvDZmZSoW5dGl96KTvbtiWnadOT\ngVJMejqNVq7ksmbNeGzYMH9CmTHmNCuTCZ3GmKKJi4sj07fpWc2aNfEeOwbHj+v0Q40acOmlBQcX\n/mWse/boG/nu3TqNEQnNmmndjYJcf71O00yfDjVqcCIri/TNm+H//g8aNz55Wk6LFqR36EB6Whqf\nP/MMHWJjeWfKFCpWtF9JxpwN7H+yMWeoAwcOAHDllVeyetMm3VZ9wwadMilsGeunn+rxrKz8y4IX\nVX77lwRLToZ16+DAAQ2Kjh+HESM0R6R2bc3d8Aca8fHsGzqUj1asoHPfviyaPdsCDGPOAva/2Jgz\n3KpVqwC48MIL2bxtm+ZRTJ6stTKOHdM37YsvzruM9fvvNUejWrXwAoJwFLR/SbB+/eCxx+DOO/NW\nAN22TYOL/fs1yfTmmwE40q4dq4ABQ4Ywe+rUyPTXGFNqzpi9RYwxBfvuu+9wWVn0aNsWjhyBvXsh\nMRGaNNH9S1JSck9u316TPxs00CAjEgravyRYQoKOTlx+uSZ9Nm2am/z58stajvzrrzXA8G2qdqRd\nO5YeOEBK4PMwxpRJFlwYU8Z88MEHOK+X9YsWUeHzz+GnnzS3YuZMGD9eV27ExmpC6Pnna5ARCYsX\nF21vlOTkvLuxBoqP1/1Q7r0X3ngDeveGESPY17Ejj0+cGIneGmNKkU2LGFNGtW7dmpws3W7nuuuu\n47ONGyEjIzcvo3Vr+Pe/NchISyvZctTUVF0CG2pjtPyEkwDasaPu+vrxx3qPxx5jvtfL/+7axX/+\n85/i99cYU6ps5MKYs8Cnn36K83r58113wcGDcOgQ/Oc/sHq11qeYO7dkN3j/fa2tURThJoB26KDT\nJjfdpCti4uNZsGYN0rAhjQNWmBhjyg4LLow5izzzzDO4fft49u67daVIhQoaZKSna2Gt4iho/5KC\nFCUBtGdP2LlTR1tuuw2uvhoqVGDnhRci9eujmyYbY8oKCy6MOQuNHDkSt28fH0yeDLt2wc8/6xbq\nS5cWraHC9i8pSFETQD0e3Sht+XLIztZlq5s3a4BSqxZSvboFGcaUEZZzYcxZrEePHricHDweD3Xq\n1NGVGp9/DnfcUfL9SwqzeDGMHh3++Z06wSuvaF2M3r3hlltyp1bS0uC99yAjAznnHDh0iJ9//pnm\nzZsXvV/GmKiz4MKYciAuLg5/qf9atWpxYONGLbh1yy2n7l/y/vs64tCvX9GnQvyKkwAaH69lzu+/\nP+/xHTs0MfToUahcWdutXp3zLr0UjhzhD/ffz/PPP1+8fhpjosKCC2PKmczMTAASEhJIe+klrUfh\nnO5h0qYN/L//pxU+ixtYQPETQCsEzNR+8omWMndO63UMGJC3Kum2bfDee4x76y3GvfoqLRo14vvv\nvy9+n40xEWPBhTHlVGpqKgDt2rVj5Xff6Zv2V19pcSsRzc8ozoZnJU0APXYsd/Ti0kuhT5/QUziB\nO7LOmMEPGzeenDIp6xsyGlPWWXBhTDm3YsUKAF599VXueeABHcEA2LpV3+i7dg2/MX8C6MMPF70j\n27frlMc992gyZ69eWmm0MPHxer+FC+Gjj+DIEaRBA9izh4yMDNvW3ZhSYKtFjDEA/Pa3v8UdOkTG\njz9S4/hx3f9j6lQYO1ZHBwqSmgoTJ8L69SVLAP34Y2jUKPzAIlCXLloro2VLeP55SE6mTosWiAgv\nvPBC0ftjjCk2G7kwxuQRFxfHQd+OrAMGDOCdd9/V6ZL69fVNPzDvIZIJoEeO6GZsjRoVPbDw69IF\nvvxS8zWaNNE2f/yREY88woiHHqJW5crs921pb4yJHgsujDH5mjlzJjNnzuSqq65i1bff6kjGRRfp\nSpOjR3UlR6QSQDMzdeVKr14l63TfvjBpEtx9t07pBCR/Zno8SLVqcOSI5WUYE0U2LWKMKdQXX3yB\nO3CAu268ETZu1CmTvn11pYnXq0WvSlIB1OvVZabHjpVsDxTQAKVCBe1b4G6skybpbqzt2kHDhkhc\nnBXlMiZKLLgwxoRt2rRpuL17GVC/PjzwAHz/ve5b8pvfaJCQ3y6o+Vm6VIOSChV0B9fk5Mh0tFOn\n0H2Jj9fCXr/9LdSsCeefj9SujYjg8Xgic29jjAUXxpiie/vtt3H79rH8lVfg22+1smaFCjo9MnFi\neAmg48bpDq4PP6ylv0V0J9VISEiAPXtCP+b16r4rLVpAxYp6bp061ElMRKpU4b777otMH4wpxyzn\nwhhTbO3atcMdOwagUwwNG2qyZ3q6ntC1qwYM/gTQ1FRdMnriBFx2GQwcqOcdPapTK+FudFaYUDuy\nbt4M8+ZpbkdysiagBhXkYv9+Xpoxg5fefJN6lSuzJ78AxRhTIAsujDER4U+QbNCgAXv27YNKlXQv\nkypVtG5FQoKu4OjfH375RZed+oOLKlV0FCGcLdrDEbgja04OTJig7ffvX3hBrlmzYONG9mZnI9Wq\nUU2Ew5HqlzHlhE2LGGMiavfu3bjMTIb07aujBIcO6XbqR47o1xs2aEnvY8dyp0/q1NFj27dHphOp\nqbo8NicH/vpXSErSnV0LSxaNj4eRI3WlSfPmcO21ZNWocXJHVsvLMCY8FlwYY6Li5Zdfxh07xgcz\nZsC+fbrKxOPREYJu3bSs99tv68mDB8OPP2ohrUhYsgQ6dNARi44di143o0MHuP56qFFDK4bWrAl1\n61LnvPMQEV599dXI9NOYs5QFF8aYqOrRowfOOTL8wcMf/gBDhmgC5w8/aCDQuLHmYVSuXHgyaGFS\nU3UkJD1dp0KKW5CrQwetNJqQAPfeqyMhLVvChRdyz5gxiAhdi1Ia3ZhyJOrBhYj8XkS2iEiWiHwh\nIpcXcG4vEflERHaLSKaI/FdEro92H40x0eff9t3t2UNCVhb84x9aL2PKFA06evfWqZO5c0t2ozlz\ndEfWefNKXpCrZ0+YP19HP1q10uWyXq/mjiQm8smGDUiNGtSqVatk9zHmLBPV4EJE+gHPA48ClwEb\ngQUiUi+fS9oBnwA3AG2ApcB8Ebk0mv00xpxe27Ztwx04QOPKlSEjQwOMdet0+egvvxS/INfChToi\n0rix5ndEoiCXxwMHD2qg8cUXcNddWpU0ORmuugratOFA7dpInTpWlMsYn2ivFhkBTHHOvQEgIkOA\nm4C7gL8Hn+ycGxF0aIyI3Ax0RwMTY8xZ5JdffsHj8dC4TRuOLlsGtWrBgQM6euGcTk2Ea+FC+Owz\neOQRXYkSqYJcyclakKtbN83BWLVKk1STk+HKK/PuszJ3LtKoEezaZeXFTbkWteBCRCoBScDf/Mec\nc05EFgFXh9mGALFARlQ6aYwpdXFxcXRu25aPunTRHImhQ/WN+u23dZfVPn0KHoFITdVzK1XSwCIm\nBnbvhrZtI9PBZs20ONi4cdqPfv3yX87asaPmjMyYcXLb959//pnmzZtHpi/GlBHRHLmoB8QAu4KO\n7wJahtnGn4AawLsR7Jcx5gzz+PDhfPHMM+wbOjQ352LsWH1TX78eGjSAG27IuyNraip88IHupDpw\nYN6N07KyIluQa/VqGDAgvOTQ+HitOvrJJ/Daa5x36aVw5AhtLrmElJSUyPTJmDPcGVtES0QGAo8A\nPZxze0u7P8aY6ElKSqJDbCwfrVzJEf+Iw8MP6+cHH4TvvtPRjJgYXVHSsCGce65+jBx5aoPVqkW2\nIFezZkVbdeL1ah2Piy/WJbY5OaxLS0Nq1gSv16ZMzFkvmsHFXiAHaBh0vCGws6ALRaQ/8ArQxzm3\nNJybjRgx4pSM7QEDBjBgwICwO2yMKT3vTJlC5759WeUcR9q1y33gmWf08+jR+kZdqZIW5crO1mmU\npUt1OiJQgwYajLRoUfKOpaZC69bhnRtcYrx//7wlxt9/HzIykNhYOHjQggxzWs2cOZOZM2fmOZaZ\nmRmVe0k0f7hF5AtglXNuuO9rAVKBic65Z/O5ZgAwFejnnPswjHu0AVJSUlJo06ZN5DpvjDntsrOz\n6X/vvSzzetnXvXvo3IZ583T79HPOgUaNdITg1lu1OJef16tBydixJe/UqFE6inLOOfmfE1hivFev\ngnNE0tLgzTe1Uun+/XD8uAUZptSsW7eOpKQkgCTn3LpItRvtOhfjgN+KyCARuRCYDFQHpgOIyFMi\nMsN/sm8qZAbwR2CNiDT0fdSMcj+NMWeAihUrMmfaNBY8+CDdFy2i6RNPELN0qW7tvn07fP89MbGx\nNL3mGtpddx18840uZZ0+HZ57LrcAV2ysrjyJREGuevUKDyyKWmL8oYfgd7/T6ZZ69ZC6dRER/vzn\nP5esv8acIaI6cgEgIkOBP6PTIRuA+51za32PvQ6c65zr5Pt6KVrrItgM59xd+bRvIxfGnKU8Hg9v\nzprF5tRU9h84QO2aNWmZkMAd/foRFxd38jwR0emHWrX0o08frfj59dcwIniFexE895wW5ApMFg02\nbpwGFsWpBLp4McycmZsfcuSI1tSw0QxzmkRr5CLqCZ3OuUnApHweuzPo646hzjPGlE9xcXEMGzKk\n0PP8b8Qiom/UkybplEl2tr6BF6fmxaJF2lZBgcXmzSUrMZ6cDF9+CZdfrkmqaWlao2PXLsRX88OC\nDFMWnbGrRYwxpqjyBBler45ivPaaPliUAGPZMnj33dwVK/mZN0+TNkvillv0XiNHagJqp04aZMyZ\nA99+i8TFwf79FmSYMsU2LjPGnHWcc7hjx/hNkyZaTvzll+HJJwvPwUhNhYkTtbbGFVfA0aP5n+v1\nRr7EuF98vE7n3HabLretXx+Ji7Py4qbMsJELY8xZa+NG3TVgw4YNXHbZZToFUaeOjhYkJOQt3b14\nMcTF5eZYvPGGBhv5LWddvjw6JcYDdeigCaPvvKP9rFHj5HTJn/70J/7+91N2UTDmjGDBhTHmrNe6\ndeu8UybPPw+1a+teIeeeC02bwqBBuunZ9u3w4ov6+Lff5h9ARLrE+GefhX4sOC8jNRXmzOHZ6dN5\ndsIEOHbMpkzMGceCC2NMuZInyKhVC6pW1UBi0SIdyahfX3c9/fRTnRZJSws99RHpEuMFVRQNzstI\nTtZ+vfEGbNyI1KgBhw9bkGHOGJZzYYwpl5xzuP37qfrTT7BiBXz1lY4gXHCBVvj0eKBJk9y9ToJF\nusR4QYFKfnkZY8ZovYyGDTUv45xzLC/DnBFs5MIYU65lZWUBcOONN/Lx+PE6VVKpkgYZq1bpm/7y\n5acuN41kifHt23XEpCD55WV07KjTOR9/DOnpUKWKlRc3pc5GLowxBvj3v/+NO3yYn1evhh07NM+h\nXj0NLt56SxM+A7Vvf+qx4lq8WJM3C9KsGezZE/qxDh00b+T66zXYiY+HBg2QKlUQEc6/8EI8Hk9k\n+mpMGCy4MMaYAM2bN9cpk4wM3RTtm2/g+HHdDyRwOWskS4zHxRVcYhwKz8vo2VNHLo4fh/PP16qh\n558PcXH8tHUrderUQURs23dzWti0iDHG5CNP8mfNmrB6tZYUb9oUbrxRd0udM6dkJcbff1+Xvxam\noLwM/26sv/ySu3GaPxjxF+TyeODwYf7nmmvg2DGOHz9OxYr2FmCiw36yjDGmEHmCjAoVdMv3uXM1\nL+PHH4tfYnzZMq1jUVCJcb9QeRmBu7H27x96VUtg1c9//lMDkSpVqNS4Mezda3kZJipsWsQYY8Lk\nnMN5PFzZrBls3Qr//a++wc+fDwsXFq2xZcs0UXTYsPDOD87LKM5urKNGwcCBWl20TRu4+OKTO7Ja\nToaJJBu5MMaYIvriiy8A2LJlC+e1aKH5ElOn6uqSwYMLfqNPTdWpkJwc3bskJqbwG4bKy5gwQVeK\nFHXTtC5d9J5ffAHDh5+cNqlz4YWwezdJSUmsXbu2aG0aE8SCC2OMKabmzZvjsrMB35TJypWak9Gg\ngSZYNmtWcInxcAXnZZR0N9ZOnbSfJ07ov/3TJjNmkLJuHVKlilX+NCViwYUxxkRAnryM/ft1s7Qa\nNXQUY98+XbkxapSuMimKUHkZkdiNtWdPrfrZsqV+HR+vIymffALTpoHXi1SuDMePW5BhisxyLowx\nJoKcc7gjR2DvXsjI0P1JnNOVJp9/XrTGQuVlRHM3VoCrr9YdYePitM5HvXpI1apW+dMUiY1cGGNM\nFPj/2m/dujUbv/lGa2JMnQopKXD77cXPy4jWbqz+5ayZmXq8R4/cKZ1t2+C995D69WHvXtuR1RRK\nyvpwl4i0AVJSUlJo06ZNaXfHGGNC2rJlC+edd55Oi9Srp1MmN99c+NbvwV57TXdjjUTZ8e+/1w3a\n9u/XHA5/jYz8+PIy2LBBp3rApkzKuHXr1pGUlASQ5JxbF6l2beTCGGNOA3/lT/DlZdSpA6+/rvuC\nxMbqLquXXFJ4Xkakd2NdvRoGDAgvOdSfl7F4sfb92DFL/jQhWXBhjDGnWZ4go0YNOHYMfv1rHQ1I\nSSl4n5FI78barFnRV50kJ+tUzWefaT7Jpk1InTrg8ViQYQALLowxptTkCTJWrdJAIzUV1q6Ffv1C\nT4xxuRAAACAASURBVFFEcjfW1FQtYV4cHTro9EhsrOaGbN4MjRohNWuC12tBRjlnwYUxxpQy/xvx\nrbfeyuw5c+DAAVi3TnMvevXKm5chAu+9pwW0SmrRIp3mKK7evWHSJLj77rzJn//6F9KgAezZQ8eO\nHVmyZEnJ+2rKFAsujDHmDPHuu+8CAcmfhw7Bq6/q6EDjxpqnUb9+7m6sJVmOmpqqiaWF7cZakIQE\nnR5p3Di3nRYtoHPnk8mfS9evRypWhJwcG80oR6zOhTHGnGFObvuemQm7d+tW6ps2wZo1urrj8GF4\n552S3eTdd8PbjbUw/uWswfzJn0OGnAyKpHZtq5dRTlhwYYwxZzDnHC4rS5M99+7Vjzp14OhRrXlR\nHIsWaYBSlBLk+WnWDPbsyf/xLl3g3nt1N9m4OIiPR845x4KMs5xNixhjTBmQJ/lzyRKd0vjuO8jO\nLlpRrWXLdNSiJLkWgfy5FgVJTtbkz927dUrn66+hYkWkRg04fNimS85CNnJhjDFliHMOd+IEv+3Z\nU6dLJk+Gv/1NcxwKkpoKEyfC+vVa3vvo0ch06PDh8Opu9O2rpctvuQXuuktHX+rVg4YNkRo1EBHO\nP//8yPTJlDobuTDGmDLolVde4ZVXXgF8oxkbN+qbde/emu9QUNXPDz+M3HLW7ds1ybQwCQlQs6au\ndBk9WoObBQugeXPdgyU1lZ88npPTJTaaUbZFPbgQkd8DI4FGwEbgfufcmgLO7wA8D1wMpAJPOudm\nRLufxhhTVuWZMtm2TetlxMRogaukpFOrfrZvD888E5nlrIsXa7AQjuuvhzffhKee0pUwocqfz50L\nu3ZZkFHGRTW4EJF+aKBwD7AaGAEsEJELnHN7Q5yfCHwITAIGAp2BqSLyi3NuYTT7aowxZV2eIKN2\nbQ0wVq3SUYJBg3KXrsbGRm45a1xc+MtZmzXTe+e3cVujRhp0/Pyzro7JyECqV4esLAsyyphoj1yM\nAKY4594AEJEhwE3AXUCoLfV+B/zsnPuz7+vNInKdrx0LLowxJgyn7GGyZo2+WTdqBDfeqG/yV10F\ns2fDH/5Q/Bu9/37RlrNWrw4tW54aWATvyHrDDZqbEbgjq68olwUZZUPUggsRqQQkAX/zH3POORFZ\nBFydz2VXAYuCji0Axkelk8YYcxbzvxG3bduWTz//HI4c0WmHCy7QHVGPHNFpjeJs4b5smZb9Lspy\n1uDkz5wcmDBBd2Tt3z/0aEZgUa433kDq1YN9+4iLiyMjI6Po/TanRTRHLuoBMcCuoOO7gJb5XNMo\nn/NrikgV51yE0puNMab8WLly5cl/i4jWpahfX3MzPvwQTpzQehThWrZMa2wUdTlrYPJnTg789a+a\n9xHujqxjxsDChTB1Kp4TJ5CqVeHoURvNOAOdNatFRowYQa1atfIcGzBgAAMGDCilHhljzJknz5RJ\nXBxUrqx1L1avzpuXEUpqqk6F5ORoYBETU7SbByZ/TpgQfmDh5/Vq/shFF+lUSpMmcPCg7cgappkz\nZzJz5sw8xzIzM6Nyr2gGF3uBHKBh0PGGwM58rtmZz/kHChu1GD9+PG3atClOP40xptzJE2Ts368V\nQL/+WnM0gpezpqZqVc969XKXsxZVYPLn5s06FRJuYBGckzFwoK6E+ewznS75+WeoUgWJjYWDBy3I\nyEeoP7jXrVtHUlJSxO8VteDCOXdcRFKAZGAegOjaomRgYj6XfQ7cEHTset9xY4wxEZaRkcElffqQ\n3rEjPPKI5mG89ppOXzRtqgGFc/p55Mji3ygw+XPePM2xKEyonIzNm7Utf6Bx3XV5g6A5c5D69WHv\nXjIyMoiLiyt+n02xRXtaZBww3Rdk+JeiVgemA4jIU0AT59xg3/mTgd+LyDPAa2gg0ge4Mcr9NMaY\ncunNWbPY2batvkkvXapTDz166Jt1RgY0aKCrN9LS9PHi1MYITP70ejUwKGwJbHBORk4OjBtXePJn\ncvLJ5M86LVue3PfERjNOr6gGF865d0WkHvAEOr2xAejqnPPvctMIiA84f6uI3ISuDhkGbAfuds4F\nryAxxhgTAd9t3UrOr36VeyA2VoMI0Df2/fvhl1806dO/l0lRkj+XLoUVK3KTP5cvD291SmBORnGT\nP5cu1aJdBw4gMTFw4oQFGadJ1BM6nXOT0KJYoR67M8SxFegSVmOMMVHm8Xrz3xvEH2R07aorSypU\ngNdf11Lj/foVnvz5zjs6zRKY/Ll7N7RtW3CngnMyipP8CXqNCPznP7oPS3Y2UqsWHDhgQUaUnTWr\nRYwxxhRdXGxs4buaLlignzt31iDhs8+0TkadOlrSu1mz0HuZVKwI116bd1VJVlbhG50F5mQUNfkz\nWIcO8OWX8JvfwJYteuy775C4ONi/3/IyosR2RTXGmHLswsREYtLTwzt50SINNM47T1do/PADTJ8O\ns2bBW2/pY1u3wjXXgMejy0YXBhVXrlat4GAmOCdj3jzo1as4Ty1Xz546tRMbC336wB13QJUqUKsW\ndZo1Q0RISUkp2T1MHhZcGGNMOXZHv340CiiyFZbnn9dA48UXNXly3Tr9fOSITkM4p5uljR4Ndevm\n3Q6+QQMd3chPYE5GuMmfhUlI0GCnSxeYP19HW+65B1q10kCpSRP+p2NHJCYGEWHi5Ml4PJ6S3bOc\ns+DCGGPKsbi4OFo3bZo3AAhX48aalzF3Lnz7rRbiWr0aqlaFHTs0iGjdWkc2/Nq312mT/OzerdMs\nEH7yZziSk+GnnzTIOHgQOnXSwOeyy+DKK3XZbf36EBfH8D/9iTp16tB98GAb0SgmCy6MMaace3z4\ncOrOn1+iNur27UvKggW6ouTFF2HsWHj6aZgzR9/Uly3TEwN3ZA0lMCcjMNAoqf/f3p3HV1Wd+x//\nPBmEAmEsCgYQS1GxeisEq9SKxlS9VuFaCxdDpdZ5QhSvrbTYW1S4/rD+sHGqOOFQSRGpCOhPZKy0\nRq2JQ9VA7XVISEVRQoggNSTr98c6Bw4hwzkne+ck5Pt+vc4Lss/e+6yV6TxZ61nPGjDAL0vNy/PB\nzm9+4wuHHXIIjB7tc0OOOw6+9S1f06NvX5YtXszIkSMZd9FF7Nq1K5h2dBBK6BQR6eBycnI4OSuL\nZ9etY2dzKzka0PnFF8nt3p0RI0bsu+17167+pN//3k+X5Ob6OhpPPw1Tpux7s9icjHiSP+PVpYvf\nzn3dOv8a+fn+dWIrfx533N6JqYsXw5YtLFq0iMyHH6ampoaMDL1txkOfJRER4Q9z5/L98eN5xTl2\njh4d93WdX3yR415/ncKFC3cfe+211+gzfjyfX3mlDyYyMvxUyX33QVGRT6jctctPe9RfBRLNyRg6\ntPnkz0Ts2AHvvAP/+Z9+pCKe3Vhzc/0Iy+OPw1tvkXnggVBZSUFBAVMaCoxkN2vva33NbARQXFxc\nrL1FRERaYNeuXZx72WWsra7m8zFjmq1j0WfZMnK7d6fwvvv2+ov+rJ/8hGdPPXXv62fN8kmgPXr4\nR5cufsXG2LE+/yGquhpmz/bTKsuW+ZGPZKqC1rdypd875eqrEyvIFbVihS+LnpbmR0AiG35t2bKF\nxxcsYP2HH1JZXU2vrCyOGDyYSRMmtIslrjF7i+Q450qCuq9GLkREBICMjAyeeughiouLuenOOykp\nL2fTiSdSm529e7ogvaKCfuvWMWLQIGZMm7bPH3WVlZW8UVGxb2Ayfbp/1Nb6ehm9evn6F/Pm+STQ\nH//YXxObk3HSST7QCCK4WLrUBzjJFuSKViVds8a38c03oa6O3gce6Edh7r9/r8/RbePGMXzAAGZM\nmRLKxmBtnYILERHZS05ODksefZTKykoeX7CADR9+yNZt2+jZvTuHDxrEpEWLGv2rfPdeJY1JT/dv\n0Bs2wOWX+6mJ7dv9apPoPibHHOMTQadO3RNotGQ5almZT9KsqGhZQa5TT4XiYl9349hjfY2Pww/3\nNTQmT4avvoI1a6gdOpSKk0+morycotmzOTkriz/Mnduh8jU0LSIiIoG5cto0fjdkiM9ZiEd0ozSA\n7t39VElmph/huOgiX6Ni+fKGkz/jdeut8KMfwTPPNJ5jEa+yMpg/39fxWLECnn3WBywVFT64qK31\nUybR0unsyUtZuXBhmwswwpoW0VJUEREJTJN7lTQkulHamjWwbZt/Y66q8m/Sjz3m38yjyZ/JWLXK\n37d//+AKcm3e7GtlnHoq9OsHl1wCt90GI0b42hmDBvlgJjcXbruNnaNH88rw4eRffnnLXrsdaVsh\nlIiItGtx7VXSmOhf+zNm+GCiVy+fkzFsmB+9cM7vFRKvtWth4UK49tpgC3KdcYa/91lnwcSJfiTj\nqqvg29/2QVF5uf//Bx/AX/8Kp5/Ozq++Ys348RQXF6c0ByM61RVNQN21fXsor6PgQkREAhPdq6Q2\n3mmRhsyYAUD6mjXU3nyznzrp2hUeegheey2+HVkXL/Zv9N/6lh8diWc31ngNGuQ3bwNfi2PDBj/1\ncuqpvn5GtFZGWZlPJO3SBbZu5fMVKxi5cGGr7MjaUBDx7gcfUJmWxqcnnUTtkCG+XaWloby+ggsR\nEQnMpAkTuG3cOCoSGWFoRL9163i7spKePXv6olxf+xr8+c9+L5NevXxi5aBBDe/IOnasr61xxx3+\nuaALcn3xBcyZ4/Mtbr658VoZeXl7amW8+Sb06YP17g2VlaEEGcXFxfy6oIA3Kir8Sp/Bg2HRIt/m\niy/et50auRARkbYuuldJRQArPEYMHEjPnj0BuOKGG3yi6KWX+ue3b9+z/HPgQL/9e9++ftfTykp4\n9FEfZBx9tA86gizIVV3tC3Ll58e38mTgQPjlL/fUyti1CwYMwPr0gS1bkg4yYkcntlRV8errr/OJ\nc+y44AI44gg/cpNMTY8AKLgQEZFA3XTNNbw8e7av0JmkPsuWMWPatN0f704UjeZl5Ob6j7/80r/Z\n9+vn37RranyQMW2anw6JFuU6/vg9lT9b6tFHfaXPZGplpKfDCy/4HWQ7dYIDDtgdZBx00EFs2rSp\nyVtUVlbyP7ffzpMrVrC5upovs7P9pmsnnOATSjdu9KXWe/b0AVgKAgtQcCEiIgELcq+SqH0SRaNB\nxqpVPt9h+3a/iqNPHzjnHL8ra1WVv6amBr75Tf+m29KCXBs2+NobsVVFE3HKKfDWW742Rlqan1Yx\ng969+WTHDiwtDZxjy5YtzH34YRY//zz/W17Ol19+yVfArm7dcOec41eoxE4HPfaYDyjGjPHVTf/0\nJ3j55ZQEFqDgQkREQhDkXiXQRKJoXt6eVSC5uf7N9p57/DTIN78J3/gGHHYYPP98MAW5FiyACy5I\n/nrwwc+998LPfrYnQCgv95u5bdkC27fT++CD/ehGjx6+vYcdBuPHN78PytNP+9obNTV+JUuKqM6F\niIgELiMjg5ULF3LmRx/R5957G99iPaqsjD733stZZWUNFpuaNGEC/data/oea9b4/UiqqmDrVj/K\nsGkTnHYa1NX5bdeffjr5TlVX+zf/IGplpKf72hvZ2T44OOUUuOsuv0398cf7HJL+/X1S6ttvw3XX\nNf+6Awf6YmPDh/tVIAcf3LJ2toAqdIqISKji3qtkypQmf483uCFaU6JTIAce6JM7O3f2jzPOSG66\nYM4cX78iiHoZq1f7EYuzzmr4+VWrfHnx7GxfufSNN3yQdPHF8d1/1So//TJ1atPn/f3vcNlloI3L\nRESkPWnJXiWxEk4UjU3+/PRTP8XQrZvPT6irSyz/Ys0aeO89n9MQhAED9tTKaEg0gHniCTjoIB8A\n3H+/PxZPgJGX51e0bNjg9z9pZQouRESkVfTq1YspLSiBnXSiaCTI6LR6Nf+65RY/inH//VBUBJMm\nxV+Q68gjg62V0dzS2Lw8XzSsogJ+8AO/DHfu3PhHL84+G558UsGFiIhIU1qUKPq3v7GypoY333yT\nkSNH+kJYb73lg41zzoFDDmm8INdhh/kaFUHVytixI75AJT/f16pYuhSuv96XE8/N3WtjtEYNGuRr\nfnzxhR+xaUVK6BQRkXYjiETRnJwcnHOsXrIEPvnE7wHyu99BQQEUFvrpih07fK2M66/3gQX43I2N\nG4PpyMaNvh5HcwYN8nkiH3/sg4SJE31Nj3jl5fl9UFqZRi5ERKRdycjI4KmHHoo/UXTatAYTRXNz\nc/nRhRfy9MEHUzdzJvzrX75WRu/eflfT2FoZGzfCunV+eWhLa2WAHxX5xS/iO/fss+G55/Zslta7\nt58mee655q9tLrcjJAouRESkXQoiUfQPc+eSN24c66ZOxb36qk+AfP99uPNOP5UQrZXRty/ceCPc\nd1/La2WUlfnplninKgYO9Dkfmzf7j885x4+yxCOe3I4QKLgQEZF2rSWJohkZGax66ikmXHIJT2/e\njJszZ0/gkJvri1F16gTf/74vJz52rK+VMWVK8g1evNjfJ15duvjS5tEgYeBAv0tsPOLN7QhYaDkX\nZtbLzJ4wsyozqzSzB82s0c+GmWWY2Wwze8vMvjCzCjN71Mz6h9VGERGRjIwMFs2bx2O/+hUZTzyx\n54k1a+Cpp/wUxnXXwRVX+FGHysr4EiobsnatH4WI5nHEY8cOXyY8GiR06eJLh8ejvDy+3I6AhZnQ\nOR8YBuQBZwKjgblNnN8FOAa4CRgO/BA4HHgmxDaKiIgAcN555zG6SxcfTMRaswYWLoT16/10yZtv\n+umRF15I7AXWrvV7fiQ66lFe7it6RoOEHTt8nY54LF7sA5NWFsormtkRwOn4il+vR45dDTxrZtc7\n5/bZ9s05ty1yTex9JgOvmNkA51xAKboiIiIN6xQNLtLT4eST934ytigX+JoTr74K558ff62MG2/0\n907E4sVwwAF72lNe7jdqa05ZmW/Xu+/6gGjixJaXLo9TWOHMKKAyGlhErAQccBzxj0b0jFyzNdjm\niYiI7K2yspK3Pv7Y15UoKPA1MH74w33fkKNBxvTpPhB5+22/THXsWL90NJpE+dFH/vmvf31PrYxE\nlZX5FSqHHLInAfSPf/Q5GM1ZvNivNPnnP32bFyzwUzp5eX4VSZcu/v4hCCu46Ad8GnvAOVdrZlsi\nzzXLzDoB/weY75z7IvgmioiI7PH4ggVsOvFEP7Jw3XV+5UhDb8jRpang9yhJS/Nv3hs3+poUQ4b4\nFSaffeYTL6+/PvlGFRb6rdSjCaBlZX7ztOaWoUZzOzZuhJUrYdIk0jdt4sDVq+m9Zg1HHnEEmZ06\nUbNjBwubvlNSEgouzOxW4IYmTnH4PIsWMbMMYGHkfnEVkZ86dSo9evTY61h+fj75+fktbY6IiHQA\n6z/8kNohQ/YcOPxw/6iu9rkS0eJaXbr4/Idp0/wKkjlzYMYM+N73/KqSf/3LrzA5/3yYNctvUnbK\nKYk3aNUqH+AcffSeUY/58/1Or01ZuxaWL6dLnz70X76cY3Ny6F1ezheffMKn3bqRmZnJzk8+YSdQ\nVVWVeLvikOjIxe3AvGbOeR/YBBwYe9DM0oHekecaFRNYDAROiXfU4o477tCuqCIikrTK6uqGl21m\nZTW+eynANdfAzJngnB8lKCjw5bpLSvyUyLx5/rlEdlON7op66KF7EkBXrfL3bGylSlkZPPkknTds\nIG/UKG6+9tpm3xdLSkrIycmJv11xSii4cM59Dnze3HlmVgT0NLPhMXkXeYABrzRxXTSw+AaQ65yr\nTKR9IiIiyeqVlZVcwan0dJ+oGc3TGD8ezjwTfvtbv8IkI2NP8ud55zWf/Dl/vt/R9N/+zU/PpKf7\nwOL++30J8BUr9t4HpayMrz33HAdlZjIuL49fPv54XLvMhimUnAvn3HozWw48YGZXAAcAdwGFsStF\nzGw9cINz7plIYLEIvxz1LCDTzA6KnLrFOVcTRltFREQAjhg8mPSKCmqHDk384vp5Gps2+WTQ5csh\nO9uPOLzwgl+10aePr7I5cOCeAKG83Bfn2rrVT7n86ld+KiQabJSU+MTOrl2huBieeIK+WVmcfPzx\nfC8nh0mrVqU8oIgV5uLXicDd+FUidcBTwDX1zhkKRBMlsvFBBcAbkX8Nn3eRC7wYYltFRKSDmzRh\nAreNG0dF/SWoiYjmadxwg8/VOPJIHxgcfTT893/7x7vv+mCic+c9NSgyM2HECPj3fwcz/3xBgU/e\n3LTJb1a2axc9+/XjvKOO4ubCwjYVTNQXWnDhnNsKnNfMOekx//8ISHDxr4iISDB69erFMdnZVASx\nd0jfvn7zM4CLLvIjGkuW+OmMww+H117zq1A6d/Y1K7p1g5dfhlde8QWytm/3UyBduzJ81Ch++pOf\nMGnChDYdUMTSlusiIiIRN11zDX2WLm3ZTRraO+Tww+H66+l0yikM2b6dCy+9lKHDh/vnunXz0yNm\nPrDYuZPhRx3F+++/j/viC0peeokpl1/ebgIL0MZlIiIiu+Xk5HByVhbPrlvHzhNPTPwGje0dUlZG\nn2XLyO3encKXXyYjBSW5W9P+3TsREZEE/WHuXL4/fjyvOMfO0aPjv3DFCl/c6tJLffGqHTtIr6ig\n37p1jBg0iBnTpnWYkgkKLkRERGJkZGSwcuFCzr3sMtbeey+fjxnT7PLRPsuWcULnzpx88cX8o7yc\nrdu20bN7dw4fNIhJixa1qymNICi4EBGRBjnnMLNUNyMlMjIyeOqhhyguLuamO++kpLycTSeeSG12\n9u7lox11VCIeCi5ERGS36upqpk+fztKlS6mpqSEzM5MxY8Ywa9YssrKyUt28VpeTk8OSRx+lsrKS\nxxcsYMOHH3b4UYl4KLgQERHABxajRo2itLSUurq63cfvueceVq9eTVFRUYcMMMAvU51y+eWpbka7\noaWoIiICwPTp0/cJLADq6uooLS3lxhtvTFHLpL1RcCEiIgAsXbp0n8Aiqq6ujiVLlrRyi6S9UnAh\nIiJs27aNzZs3N3lOTU0NzrlWapG0ZwouREQ6uOrqar773e+yffv2Js/LzMzssKtHJDEKLkREOrho\nrkVT0tLSGFu/pLVIIxRciIh0cE3lWkQNGzaMmTNntlKLpL1TcCEi0oE556ipqWnynK5du/LSSy91\n2GWokjgFFyIiHZiZkZmZ2eQ5ffv2pXv37q3UItkfKLgQEengxowZQ1paw28HyrWQZCi4EBHp4GbN\nmsWwYcP2CTDS0tKUayFJUXAhItLBZWVlUVRUxOTJkxk8eDDZ2dkMHjyYyZMnd+iS35I87S0iIiJk\nZWVRUFBAQUFBh94NVYKhkQsREdmLAgtpKQUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIi\nIhIoBRciIiISKAUXIiIiEqjQggsz62VmT5hZlZlVmtmDZtY1gevvM7M6M5sSVhtFREQkeGGOXMwH\nhgF5wJnAaGBuPBea2Q+B44CK0FonIiIioQgluDCzI4DTgYucc685514CrgbONbN+zVybDRQAE4Fd\nYbRPREREwhPWyMUooNI593rMsZWAw49INMh8zdnHgNucc6UhtU1ERERCFFZw0Q/4NPaAc64W2BJ5\nrjHTgK+cc3eH1C4REREJWULBhZndGkmybOxRa2aHJdMQM8sBpgAXJHO9iIiItA2Jbrl+OzCvmXPe\nBzYBB8YeNLN0oHfkuYZ8D+gLlMfsyJcOzDGza51z32jqRadOnUqPHj32Opafn09+fn4zzRUREdn/\nFRYWUlhYuNexqqqqUF7LnHPB39QndL4DjIzmXZjZacBzwADn3D4Bhpn1AvrXO/wCPgdjnnPuvUZe\nawRQXFxczIgRIwLshYiIyP6tpKSEnJwcgBznXElQ90105CIuzrn1ZrYceMDMrgAOAO4CCmMDCzNb\nD9zgnHvGOVcJVMbex8xqgE2NBRYiIiLS9oRZ52IisB6/SmQZ8CJwWb1zhgI9aFzwwyoiIiISqlBG\nLgCcc1uB85o5J72Z55vMsxAREZG2R3uLiIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEig\nFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAU\nXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRc\niIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRciIiISKBCCy7MrJeZPWFm\nVWZWaWYPmlnXOK4bZmbPmNlWM/vCzF4xswFhtVNERESCFebIxXxgGJAHnAmMBuY2dYGZDQHWAe9G\nzj8auAXYGWI7RUREJEAZYdzUzI4ATgdynHOvR45dDTxrZtc75zY1culM4Fnn3C9ijn0QRhtFREQk\nHGGNXIwCKqOBRcRKwAHHNXSBmRl+hOM9M3vezD4xs5fN7D9CaqOIiIiEIKzgoh/waewB51wtsCXy\nXEMOBLoBNwDPAacCTwN/NLMTQ2qniIiIBCyh4MLMbjWzuiYetWZ2WAvbstg5d6dz7i3n3GxgGXB5\nkvcUERGRVpZozsXtwLxmznkf2IQfidjNzNKB3pHnGvIZsAsorXe8FDihuYZNnTqVHj167HUsPz+f\n/Pz85i4VERHZ7xUWFlJYWLjXsaqqqlBey5xzwd/UJ3S+A4yMSeg8DT/dMaCxhE4z+wvwD+fc+THH\n/gjscM6d18g1I4Di4uJiRowYEXBPRERE9l8lJSXk5OSAX4BREtR9Q8m5cM6tB5YDD5jZsWZ2AnAX\nUBgbWJjZ+noJm78BJpjZxWY2xMwmA2cB94TRThEREQlemHUuJgLr8atElgEvApfVO2cosHsuwzm3\nGJ9f8XPgLeBC4BznXFGI7RQREZEAhVLnAsA5txVocCoj5pz0Bo49AjwSTqtEREQkbNpbRERERAKl\n4EJEREQCpeBCREREAqXgQkRERAKl4EJEREQCpeBCREREAqXgIiBhVDoVERFpjxRctEB1dTVTI2/S\n/AAACg1JREFUpkzh0EMPZeDAgRx66KFMmTKF6urqVDdNREQkZUIrorW/q66uZtSoUZSWllJXV7f7\n+D333MPq1aspKioiKysrhS0UERFJDY1cJGn69On7BBYAdXV1lJaWcuONN6aoZSIiIqml4CJJS5cu\n3SewiKqrq2PJkiWt3CIREZG2QcFFEpxz1NTUNHlOTU2NkjxFRKRDUnCRBDMjMzOzyXMyMzMxs1Zq\nkYiISNuh4CJJY8aMIS2t4U9fWloaY8eObeUWiYiItA0KLpI0a9Yshg0btk+AkZaWxrBhw5g5c2aK\nWiYiIpJaCi6SlJWVRVFREZMnT2bw4MFkZ2czePBgJk+erGWoIiLSoanORQtkZWVRUFBAQUEBzjnl\nWIiIiKCRi8AosBAREfEUXIiIiEigFFyIiIhIoBRciIiISKAUXIiIiEigFFyIiIhIoBRciIiISKAU\nXIiIiEigFFyIiIhIoBRciIiISKAUXLQjhYWFqW5Cq+kofVU/9y/q5/6lo/QzDKEFF2bWy8yeMLMq\nM6s0swfNrGsz13Q1s7vNrNzMdpjZO2Z2WVhtbG860jd6R+mr+rl/UT/3Lx2ln2EIc+RiPjAMyAPO\nBEYDc5u55g7gNGAicETk47vN7KwQ2ykiIiIBCiW4MLMjgNOBi5xzrznnXgKuBs41s35NXDoKeNQ5\nt845V+acexB4E/hOGO0UERGR4IU1cjEKqHTOvR5zbCXggOOauO4lYKyZHQxgZrnAUGB5SO0UERGR\ngGWEdN9+wKexB5xztWa2JfJcY64G7gc2mtkuoBa4xDn3lyau6QxQWlrasha3A1VVVZSUlKS6Ga2i\no/RV/dy/qJ/7l47Qz5j3zs6B3tg5F/cDuBWoa+JRCxwG/AIobeD6T4DLmrj/9UAp8APgKOBKYBtw\nShPXTMSPiOihhx566KGHHsk9JiYSDzT3sMgbdFzMrA/Qp5nT3gcmAbc753afa2bpwE5gnHPumQbu\n3RmoAs52zv2/mOMPANnOuR800abTgQ8j9xcREZH4dAYGA8udc58HddOEpkUiL9zsi5tZEdDTzIbH\n5F3kAQa80shlmZFHbb3jtTSRGxJp0/zm2iQiIiINeinoG4aS0OmcW49PwnzAzI41sxOAu4BC59ym\n6Hlmtt7M/iNyTTXwJ+B2MzvJzAab2U+BnwB/DKOdIiIiErywEjrB50LcjV8lUgc8BVxT75yhQI+Y\njyfg8zp+D/QGPgJ+4Zy7P8R2ioiISIASyrkQERERaY72FhEREZFAKbgQERGRQLWL4MLMrjKzD8zs\nSzN72cyObeLceWZWZ2a1kX+jj7+1ZpuTkUg/I+f/2MzeMLPtZvZPM3vIzHq3VnuTlUQ/rzKzdyOb\n2ZWa2aTWamuyzOxEM1tiZhWR77+xcVxzspkVm9lOM/u7mZ3fGm1tiUT7aWb9Ihsaboj8jM5prba2\nRBL9/KGZvWBmn0Y2b3zJzE5rrfa2RBJ9PcHM/mxmn8X8jF7bWu1NVjI/ozHXnmBmNWbW5itsJfH1\nPKnee2f0/fTARF63zQcXZjYB+L/Ar4Hh+L1GlpvZ1xu5ZAq+Cmj/yL8DgC3Ak+G3NnmJ9jOyAudR\n4AHgSGAcfg+WNp38mkQ/rwBmAf+N7+cM4B4zO7NVGpy8rsAb+EJwzSY2mdlgYBmwCvg2UAA8aGan\nhtfEQCTUT6ATvnrvLZHr2otE+zkaeAE4AxgBrAGWmtm3Q2thcBLt63b8asAT8RtO3gLMNLOLQ2th\nMBLtJwBm1gP/u3dlSO0KWjL9dPgFF/0ij/7OuU+bvqT+HQKsyBXGA3gZKIj52ICNwM/jvP5sYBcw\nMNV9CbKfwH8B79U7NhkoS3VfAu7nX4DZ9Y7dDryY6r4k0Oc6YGwz58wG3qp3rBB4LtXtD7Kf9c5f\nA8xJdbvD7mfMdW8DN6a6/a3U10X4TShT3oeg+xn5ubwJ/wdSSarbHnQ/gZPw9aW6t+S12vTIhZll\nAjn4v+YAcL73K/Gbo8XjQmClc648+BYGI8l+FgEDzeyMyD0OAsYDz4bb2uQl2c9O7Ft5dSfwnUjV\n1/3F8ez7l9By4v8+lzbMzAzIwo+i7tfMbDj++3ZtipsSODO7ADgUH1zszwx4IzLd/oKZfTfRG7Tp\n4AL4OpCO35Mk1ic0vQEaAGbWHz8s+UDwTQtUwv10fhv784AFZvYV8DFQiR+9aKuS+XouBy42sxEA\nZjYSuAhfzbWxqbH2qB8Nf166m1mnFLRHgvUz/PB0m56ebQkzKzezncCrwD3OuXmpblOQzGwo8D/A\nj51zdaluT4g+Bi4DfgScA5QDa83smERuEmYRrbbgp/g33H32MmnvzOxI/Lz8DPzcbn/8dMFcoK3P\ndSbiFuAgoMjM0oBNwCPAz/FDfCJtmplNBH6FH47+LNXtCdH3gG74UbjZZvYP59yCFLcpEJHfPU8A\nv3bO/W/0cAqbFBrn3N+Bv8ccetnMhgBTgbiTzNv6yMVn+Lmfg+odPwj/JtOcC4DHnHO7gm5YwJLp\n5zTgL865Oc65t51zK/AJOxdGpkjaooT76Zzb6Zy7GOgCHAIMwldurXbObQ6xra1tEw1/XrY55/6V\ngvZIAMzsXHyS9Xjn3JpUtydMzrmPnHPvOOceAu7A/+Gzv8gCRgJ3R1aJ1OADxmPM7CszOzmlrQvf\nq8A3E7mgTQcXzrkaoBi/6Rmwe+4yj2Y2Wol8sYcAD4XYxEAk2c8u+ETVWHX4LN82GVG35OvpnKt1\nzv0zkqNxLrA0zLamQBExn5eI0yLHpR0ys3z8759znXPPp7o9rSwdny+1v9gGHAUcg1/N9W3gPmB9\n5P+Nbci5vzgGP10St/YwLTIHeMTMivHR01T8G+sjAGZ2K3Cwc67+cM1FwCvOudJWbGtLJNrPpcD9\nZnY5Pi/hYPxfC6+4mM3h2qCE+hmZ5/wO/oe3N3Ad8C38hnZtlpl1xUf60UDvG5FliFucc+UNfD3v\nA64ys9nAw/hAYxzwg1ZuekKS6CeR5w0/hN438vFXbflnNdF+RqZCHsEvjf9rzGjil865ba3b+sQk\n0dcrgTL8Gy341Qb/Bfy2dVuemET6Gfmj5t16138K7GzL37eQ1NfzGuAD4B38duyXALlAYsviU700\nJs7lM1cCHwJf4v+SGxnz3Dxgdb3zuwNfABemuu0h9/Mq4G+Rvm7Er73un+p+BNlP/Lr5kkgfK/E7\n5A5NdR/i6ONJ+JGk2nqPh5v4eo7Gj+x8CbwHTEp1P0LqZ0Pnv5/qvgTZT/wy2/rn7j6/LT+S6Ovk\nyO+h6sjP6GvApanuR9D9bOD6drEUNYmv588iv3+2A5vxq/tGJ/q62rhMREREAtWmcy5ERESk/VFw\nISIiIoFScCEiIiKBUnAhIiIigVJwISIiIoFScCEiIiKBUnAhIiIigVJwISIiIoFScCEiIiKBUnAh\nIiIigVJwISIiIoH6//SEypfIyYt0AAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
2035201502969349300235.xml680383921.00.0000000.000000172324.3000000.00000010.0
9921201513179349308861.xml311040228.00.0000000.540167149559.4700001.000333100.0
12048201513179349305901.xml166050703.01.00000033.472201-148963.5636000.000000220.0
18753201513139349303196.xml943152652.01.000000254.451508-7.144765-250550.000000250551.0
30433201512599349300126.xml232799695.00.9858250.4268222.387819-54849.571430202757.0
\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "2035 201502969349300235.xml 680383921.0 0.000000 0.000000 \n", + "9921 201513179349308861.xml 311040228.0 0.000000 0.540167 \n", + "12048 201513179349305901.xml 166050703.0 1.000000 33.472201 \n", + "18753 201513139349303196.xml 943152652.0 1.000000 254.451508 \n", + "30433 201512599349300126.xml 232799695.0 0.985825 0.426822 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "2035 172324.300000 0.000000 10.0 \n", + "9921 149559.470000 1.000333 100.0 \n", + "12048 -148963.563600 0.000000 220.0 \n", + "18753 -7.144765 -250550.000000 250551.0 \n", + "30433 2.387819 -54849.571430 202757.0 " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(n_clusters_)\n", + "\n", + "outliers_index = []\n", + "for i in range(len(labels)):\n", + " if(labels[i]==-1):\n", + " outliers_index.append(i)\n", + " #print(df2.loc[i])\n", + "outliers = df2.loc[outliers_index]\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Black removed and is used for noise instead.\n", + "unique_labels = set(labels)\n", + "colors = 'cyan'\n", + "for k, col in zip(unique_labels, colors):\n", + " if k == -1:\n", + " # Black used for noise.\n", + " col = 'k'\n", + "\n", + " class_member_mask = (labels == k)\n", + "\n", + " xy = X[class_member_mask & core_samples_mask]\n", + " plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,\n", + " markeredgecolor='k', markersize=14)\n", + "\n", + " xy = X[class_member_mask & ~core_samples_mask]\n", + " plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,\n", + " markeredgecolor='k', markersize=6)\n", + "\n", + "plt.title('Estimated number of clusters: %d' % n_clusters_)\n", + "plt.show()\n", + "\n", + "outliers\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results: \n", + "\n", + "Outliers appear to be failing in different metrics. The first 2 fail at Program Expenses and have extremeley inflated Working Capitals.\n", + "The last two, despite having positive program expenses, have very high and negative surplus margins and are thus losing a lot of money." + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [conda root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Kmeans.ipynb b/Kmeans.ipynb new file mode 100644 index 0000000..8709d08 --- /dev/null +++ b/Kmeans.ipynb @@ -0,0 +1,1323 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## KMeans\n", + "\n", + "Initialized cluster centers, assigning points to each cluster and repeating this process trying to minimize Euclidean distance from points to cluster centers with each iteration.\n", + "\n", + "Outliers were calculated as being the point that was the furthest Euclidean distance away from its cluster center" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
0201523229349300327.xml510311790.00.9896190.0918021.574677-0.078663464318.0
1201543089349301829.xml261460932.00.9653780.0000003.910347-0.04267427439.0
2201533179349306298.xml270609504.00.9422760.0492060.6551520.088597384828.0
3201533209349304768.xml521548962.00.3845260.00081430.3927560.63312650912.0
4201533179349307343.xml731653383.00.6205600.0294871.0837390.10590128123.0
5201533189349300608.xml237324566.00.7589020.736982-0.176215-0.003914608126.0
6201523069349301367.xml43259150.00.7363120.1819980.3785920.057534212968.0
7201533069349300963.xml621273871.00.6448220.23424714.4131630.4961221426413.0
8201523099349300542.xml541897455.00.8456590.1385310.414986-0.016769461512.0
9201533099349301033.xml251869168.00.7623570.0000008.6739840.122244102044.0
10201523169349304367.xml376046335.00.7453860.00000016.4639760.25257937170.0
11201533099349301803.xml50454409.00.9152610.0000001.1587680.062649557347.0
12201523069349300142.xml272364809.00.9203070.0036060.240597-0.029663302061.0
13201543039349301204.xml463812139.00.6640880.6484130.0458230.088812139974.0
14201523089349301462.xml561797737.00.7046230.4326710.000000-0.050384214773.0
15201533069349300788.xml590638867.00.0000000.0000000.360157-0.055883762522.0
16201533079349300238.xml273314429.01.0000000.0000000.0000000.10758426171.0
17201523209349314257.xml710801566.01.0000000.0000000.000000-8.2123371145333.0
18201523209349311332.xml752204594.00.8968950.0697150.535678-0.0641003889691.0
19201533179349302173.xml251374594.00.0000000.3136300.3375370.014809423512.0
20201533179349307048.xml450448773.00.9912050.0513480.4149300.161552185894.0
21201533209349302633.xml363789851.00.0000000.2320110.521133-0.1289977329143.0
22201533099349301113.xml411712804.00.4776970.0098870.751388-0.014507814720.0
23201523039349300127.xml381557861.00.8321590.0638342.9881200.1709111369609.0
24201523079349301652.xml273741627.00.9451200.0000000.8636630.0949851234798.0
25201533039349300813.xml411495321.00.6681610.040796144.508488-0.1397771108143.0
26201533139349300208.xml60666277.00.0000000.6449040.654848-0.29614319649432.0
27201533069349301413.xml42616064.00.9512180.3386830.1785170.04112333254203.0
28201533079349300003.xml581651220.00.9103470.0268320.945357-0.020293324797.0
29201523069349300957.xml341496171.00.8506170.0688381.1257120.212899672775.0
........................
75744201542929349301039.xml205158717.00.4905450.0000000.4963500.00000052195.0
75745201542929349301104.xml440296401.00.0000000.0012552.673444-0.0754043921954.0
75746201542929349301204.xml131084330.00.0000000.9499500.043382-0.07956349480300.0
75747201503109349302210.xml30228267.00.7268960.0484910.4849470.0592591275474.0
75748201503109349302270.xml741718905.00.0000000.4443490.536678-0.021724700826.0
75749201522549349300117.xml150249365.00.0000000.0000002.6042911.82803554487.0
75750201522549349300127.xml942763918.00.0000000.1268736.226369-0.16031861868.0
75751201522549349300142.xml453774366.00.0000000.0068781.2217410.337512387277.0
75752201522549349300202.xml860507921.00.7510380.2561691.011925-0.0459283732567.0
75753201522549349300217.xml236390816.00.0000000.12808519.697597-2.047494227081.0
75754201522549349300247.xml940689854.01.0000000.5621910.151220-0.047202291609.0
75755201522549349300317.xml830164620.00.0000000.0090785.4866830.127495188778.0
75756201522549349300322.xml274726130.00.9915140.0037921.6514610.3928731796484.0
75757201503139349301280.xml810677279.00.5646010.0166954.3389490.52988885951.0
75758201503139349301285.xml232090256.00.7677431.176003-0.257737-0.06542516050928.0
75759201503139349301295.xml202902396.00.0099010.0041411704.0891090.979933808.0
75760201503139349301300.xml222471909.00.9408471.457766-1.7937190.013922469665.0
75761201503139349301315.xml50359008.00.6341990.0067123.5537220.199138179469.0
75762201503139349301335.xml20530732.00.7964640.8742270.345519-0.024264691125.0
75763201503139349301400.xml60668594.00.8589080.1833480.809719-0.11480228331141.0
75764201503139349301405.xml570884504.00.9607110.2026144.412177-0.36056057650.0
75765201513159349303976.xml911075950.00.9127920.1128526.9786760.0233831188586.0
75766201513159349303991.xml450537391.00.8828611.0000000.0000000.00000033214334.0
75767201513159349304006.xml630985623.01.0000000.1641631.3504700.070756201749.0
75768201513159349304021.xml930854620.00.2212010.00012114.6056220.07542662260.0
75769201513159349304046.xml263218152.00.6618380.1630340.730298-0.017285508851.0
75770201513159349304051.xml520887806.00.8192480.4175530.5373070.0287305894235.0
75771201513159349304061.xml942608741.00.8937040.0099071.6385780.044649243668.0
75772201513159349304071.xml330841281.00.8078120.3422690.8360170.29750525594615.0
75773201513159349304076.xml570751500.00.8877860.4155708.5715010.0634481610096.0
\n", + "

75774 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "0 201523229349300327.xml 510311790.0 0.989619 0.091802 \n", + "1 201543089349301829.xml 261460932.0 0.965378 0.000000 \n", + "2 201533179349306298.xml 270609504.0 0.942276 0.049206 \n", + "3 201533209349304768.xml 521548962.0 0.384526 0.000814 \n", + "4 201533179349307343.xml 731653383.0 0.620560 0.029487 \n", + "5 201533189349300608.xml 237324566.0 0.758902 0.736982 \n", + "6 201523069349301367.xml 43259150.0 0.736312 0.181998 \n", + "7 201533069349300963.xml 621273871.0 0.644822 0.234247 \n", + "8 201523099349300542.xml 541897455.0 0.845659 0.138531 \n", + "9 201533099349301033.xml 251869168.0 0.762357 0.000000 \n", + "10 201523169349304367.xml 376046335.0 0.745386 0.000000 \n", + "11 201533099349301803.xml 50454409.0 0.915261 0.000000 \n", + "12 201523069349300142.xml 272364809.0 0.920307 0.003606 \n", + "13 201543039349301204.xml 463812139.0 0.664088 0.648413 \n", + "14 201523089349301462.xml 561797737.0 0.704623 0.432671 \n", + "15 201533069349300788.xml 590638867.0 0.000000 0.000000 \n", + "16 201533079349300238.xml 273314429.0 1.000000 0.000000 \n", + "17 201523209349314257.xml 710801566.0 1.000000 0.000000 \n", + "18 201523209349311332.xml 752204594.0 0.896895 0.069715 \n", + "19 201533179349302173.xml 251374594.0 0.000000 0.313630 \n", + "20 201533179349307048.xml 450448773.0 0.991205 0.051348 \n", + "21 201533209349302633.xml 363789851.0 0.000000 0.232011 \n", + "22 201533099349301113.xml 411712804.0 0.477697 0.009887 \n", + "23 201523039349300127.xml 381557861.0 0.832159 0.063834 \n", + "24 201523079349301652.xml 273741627.0 0.945120 0.000000 \n", + "25 201533039349300813.xml 411495321.0 0.668161 0.040796 \n", + "26 201533139349300208.xml 60666277.0 0.000000 0.644904 \n", + "27 201533069349301413.xml 42616064.0 0.951218 0.338683 \n", + "28 201533079349300003.xml 581651220.0 0.910347 0.026832 \n", + "29 201523069349300957.xml 341496171.0 0.850617 0.068838 \n", + "... ... ... ... ... \n", + "75744 201542929349301039.xml 205158717.0 0.490545 0.000000 \n", + "75745 201542929349301104.xml 440296401.0 0.000000 0.001255 \n", + "75746 201542929349301204.xml 131084330.0 0.000000 0.949950 \n", + "75747 201503109349302210.xml 30228267.0 0.726896 0.048491 \n", + "75748 201503109349302270.xml 741718905.0 0.000000 0.444349 \n", + "75749 201522549349300117.xml 150249365.0 0.000000 0.000000 \n", + "75750 201522549349300127.xml 942763918.0 0.000000 0.126873 \n", + "75751 201522549349300142.xml 453774366.0 0.000000 0.006878 \n", + "75752 201522549349300202.xml 860507921.0 0.751038 0.256169 \n", + "75753 201522549349300217.xml 236390816.0 0.000000 0.128085 \n", + "75754 201522549349300247.xml 940689854.0 1.000000 0.562191 \n", + "75755 201522549349300317.xml 830164620.0 0.000000 0.009078 \n", + "75756 201522549349300322.xml 274726130.0 0.991514 0.003792 \n", + "75757 201503139349301280.xml 810677279.0 0.564601 0.016695 \n", + "75758 201503139349301285.xml 232090256.0 0.767743 1.176003 \n", + "75759 201503139349301295.xml 202902396.0 0.009901 0.004141 \n", + "75760 201503139349301300.xml 222471909.0 0.940847 1.457766 \n", + "75761 201503139349301315.xml 50359008.0 0.634199 0.006712 \n", + "75762 201503139349301335.xml 20530732.0 0.796464 0.874227 \n", + "75763 201503139349301400.xml 60668594.0 0.858908 0.183348 \n", + "75764 201503139349301405.xml 570884504.0 0.960711 0.202614 \n", + "75765 201513159349303976.xml 911075950.0 0.912792 0.112852 \n", + "75766 201513159349303991.xml 450537391.0 0.882861 1.000000 \n", + "75767 201513159349304006.xml 630985623.0 1.000000 0.164163 \n", + "75768 201513159349304021.xml 930854620.0 0.221201 0.000121 \n", + "75769 201513159349304046.xml 263218152.0 0.661838 0.163034 \n", + "75770 201513159349304051.xml 520887806.0 0.819248 0.417553 \n", + "75771 201513159349304061.xml 942608741.0 0.893704 0.009907 \n", + "75772 201513159349304071.xml 330841281.0 0.807812 0.342269 \n", + "75773 201513159349304076.xml 570751500.0 0.887786 0.415570 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "0 1.574677 -0.078663 464318.0 \n", + "1 3.910347 -0.042674 27439.0 \n", + "2 0.655152 0.088597 384828.0 \n", + "3 30.392756 0.633126 50912.0 \n", + "4 1.083739 0.105901 28123.0 \n", + "5 -0.176215 -0.003914 608126.0 \n", + "6 0.378592 0.057534 212968.0 \n", + "7 14.413163 0.496122 1426413.0 \n", + "8 0.414986 -0.016769 461512.0 \n", + "9 8.673984 0.122244 102044.0 \n", + "10 16.463976 0.252579 37170.0 \n", + "11 1.158768 0.062649 557347.0 \n", + "12 0.240597 -0.029663 302061.0 \n", + "13 0.045823 0.088812 139974.0 \n", + "14 0.000000 -0.050384 214773.0 \n", + "15 0.360157 -0.055883 762522.0 \n", + "16 0.000000 0.107584 26171.0 \n", + "17 0.000000 -8.212337 1145333.0 \n", + "18 0.535678 -0.064100 3889691.0 \n", + "19 0.337537 0.014809 423512.0 \n", + "20 0.414930 0.161552 185894.0 \n", + "21 0.521133 -0.128997 7329143.0 \n", + "22 0.751388 -0.014507 814720.0 \n", + "23 2.988120 0.170911 1369609.0 \n", + "24 0.863663 0.094985 1234798.0 \n", + "25 144.508488 -0.139777 1108143.0 \n", + "26 0.654848 -0.296143 19649432.0 \n", + "27 0.178517 0.041123 33254203.0 \n", + "28 0.945357 -0.020293 324797.0 \n", + "29 1.125712 0.212899 672775.0 \n", + "... ... ... ... \n", + "75744 0.496350 0.000000 52195.0 \n", + "75745 2.673444 -0.075404 3921954.0 \n", + "75746 0.043382 -0.079563 49480300.0 \n", + "75747 0.484947 0.059259 1275474.0 \n", + "75748 0.536678 -0.021724 700826.0 \n", + "75749 2.604291 1.828035 54487.0 \n", + "75750 6.226369 -0.160318 61868.0 \n", + "75751 1.221741 0.337512 387277.0 \n", + "75752 1.011925 -0.045928 3732567.0 \n", + "75753 19.697597 -2.047494 227081.0 \n", + "75754 0.151220 -0.047202 291609.0 \n", + "75755 5.486683 0.127495 188778.0 \n", + "75756 1.651461 0.392873 1796484.0 \n", + "75757 4.338949 0.529888 85951.0 \n", + "75758 -0.257737 -0.065425 16050928.0 \n", + "75759 1704.089109 0.979933 808.0 \n", + "75760 -1.793719 0.013922 469665.0 \n", + "75761 3.553722 0.199138 179469.0 \n", + "75762 0.345519 -0.024264 691125.0 \n", + "75763 0.809719 -0.114802 28331141.0 \n", + "75764 4.412177 -0.360560 57650.0 \n", + "75765 6.978676 0.023383 1188586.0 \n", + "75766 0.000000 0.000000 33214334.0 \n", + "75767 1.350470 0.070756 201749.0 \n", + "75768 14.605622 0.075426 62260.0 \n", + "75769 0.730298 -0.017285 508851.0 \n", + "75770 0.537307 0.028730 5894235.0 \n", + "75771 1.638578 0.044649 243668.0 \n", + "75772 0.836017 0.297505 25594615.0 \n", + "75773 8.571501 0.063448 1610096.0 \n", + "\n", + "[75774 rows x 7 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.cluster import KMeans\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from sklearn.cluster import KMeans\n", + "import \tnumpy as np\n", + "import re\n", + "import sys\n", + "import csv\n", + "from sklearn import preprocessing\n", + "import sklearn.metrics as met\n", + "from sklearn.decomposition import PCA\n", + "import matplotlib.cm as cm\n", + "\n", + "\n", + "df1=pd.read_csv('team_out_1.csv')\n", + "df2=pd.read_csv('team_out_a2.csv')\n", + "df3=pd.read_csv('team_out_a3.csv')\n", + "df4=pd.read_csv('team_out_Yash.csv')\n", + "df5=pd.read_csv('team_out_Yash_part1.csv')\n", + "\n", + "df=df1.append(df2)\n", + "df=df.append(df3)\n", + "df=df.append(df4)\n", + "df=df.append(df5)\n", + "\n", + "\n", + "df.dropna(inplace=True)\n", + "df.reset_index(inplace=True,drop=True)\n", + "# df=df[df.Total_Expenses>0]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhEAAAGBCAYAAADYEOPMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xl8lOW5//HPBQgIaHBBKApaRRAPik0U8ChQwUIh40Lr\nFq1rbWttraU9p2pda39tPXW3Fj09dbfGY7UeAaGiYItSlZooqIDgCuICuEQE2ZLr98c9KZOYhGQy\nyT3L9/16PS8yz8w8cz0ozJd7NXdHREREpKU6xC5AREREcpNChIiIiKRFIUJERETSohAhIiIiaVGI\nEBERkbQoRIiIiEhaFCJEREQkLQoRIiIikhaFCBEREUmLQoSIiIikJadDhJmNNLOpZrbSzGrM7OgW\nvn+0mf2fmb1rZp+Z2QtmdnIDr/uqmVWY2QYzW2pmp2fuLkRERHJTTocIoDvwInAukM4mIP8OLAC+\nARwA3AHcbWYTa19gZnsB04HZwFDgRuCPZva11hQuIiKS6yxfNuAysxrgWHefmnKuM/Br4CSgJ/AS\ncKG7/72J60wH3nf3s5OP/wuY4O4HprymHChy94mNXEZERCTv5XpLxLb8HhgOnEBoafgzMNPM9mni\nPUXARymPRwBP1HvNY8ChGaxTREQk5+RtiDCzfsAZwPHu/g93f9PdrwPmAWc28p4TgIMJ3Rq1+gAf\n1HvpB8COZtYl44WLiIjkiE6xC2hDBwAdgaVmZinnOwNr6r/YzI4AbgfOdvfF7VOiiIhI7srnENED\n2AIUAzX1nvss9YGZjQamAue7+5/qvfZ9oHe9c72BT919Y+bKFRERyS35HCJeILRE9Hb3eY29yMy+\nCkwD/tPdb2vgJc8AE+qdG5c8LyIiUrCij4kwsw5m9ksze8PM1pvZa2Z2STPf293MhprZQclTeycf\n93P3ZcB9hCmbk8xsLzMbZmYXmtmE5PuPIEzfvBF42Mx6J4+dUj7m1uR1/8vMBpnZucBxwHWZ+j0Q\nERHJRdGneJrZz4EfA6cBiwgDG+8Efu7uN2/jvaOBJ/niGhF3uftZZtYRuCR57d0JYyGeBS5391fM\n7I7kc/X93d3HpHzOKOB6YH/gHeBKd7+npfcqIiKST7IhREwjrMvwnZRzDwLr3b2hL3gRERHJAtG7\nM4B/AGPNbF8AMxsKHAbMiFqViIiINCkbBlZeBewILDGzakKwudjd749bloiIiDQlG0LEicDJhKWp\nFwEHATea2bsNjTsws12A8cBbwIZ2rFNERCTXdQX2Ah5z9w9be7FsGBOxHPiNu9+Scu5i4BR337+B\n158M1F/LQURERJrvFHe/r7UXyYaWiG5Adb1zNTQ+XuMtgHvvvZfBgwe3YVntZ/LkyVx//fWxy8iY\nfLqffLoX0P1ks3y6F9D9ZKvFixfzrW99C5Lfpa2VDSFiGnCJmb0DvEJYYXIy8MdGXr8BYPDgwRQX\nF7dPhW2sqKgob+4F8ut+8uleQPeTzfLpXkD3kwMyMhwgG0LED4FfEnbc3A14F7gleU5ERESyVPQQ\n4e7rgJ8kDxEREckR2bBORFo+bPWYUhEREWmNnA0R8xrdUiv3lJWVxS4ho/LpfvLpXkD3k83y6V5A\n91Mook/xbCkzKwYqxoypYPbsvBrkIiIi0qYqKyspKSkBKHH3ytZeL2dbIp59FjZtil2FiIhI4crZ\nELF+PTz1VOwqREREClfOhohevWD69NhViIiIFK6cDREjR8K0aZBjQzpERETyRk6HiNdfh6VLY1ci\nIiJSmHI2RBxyCHTtqi4NERGRWHI2RGy/PYwZoxAhIiISS86GCIBEAp5+Gj75JHYlIiIihSenQ0Rp\nKWzZArNmxa5ERESk8OR0iOjfHw48UF0aIiIiMeR0iIDQpTFjBlRXx65ERESksORFiPjwQ3juudiV\niIiIFJacDxHDhsGuu6pLQ0REpL3lfIjo2BEmTIBHH41diYiISGHJ+RABoUtj4UJYvjx2JSIiIoUj\nL0LEuHHQqZNaI0RERNpTXoSInj3DXhoaFyEiItJ+8iJEQOjSmD0b1q2LXYmIiEhhyKsQsXEjzJkT\nuxIREZHCkDchYuBAGDBA4yJERETaS96ECAitEdOng3vsSkRERPJf3oWIlSthwYLYlYiIiOS/vAoR\nI0fCDjtoloaIiEh7yKsQ0bkzjB+vECEiItIe8ipEQOjSmD8fPvggdiUiIiL5LXqIMLM3zaymgeN3\n6VxvwoTw68yZmaxSRERE6oseIoCDgT4px9cABx5I52K77RZ29tRUTxERkbYVPUS4+4fuvqr2AI4C\nXnf3p9K9ZiIBjz0GmzZlrk4RERGpK3qISGVm2wGnALe15jqJBKxdC0+lHUNERERkW7IqRACTgCLg\nrtZcZOhQ2H13zdIQERFpS51iF1DPWcBMd39/Wy+cPHkyRUVFdc6VlZVRVlaGWWiNmDYNrrsOzNqq\nXBERkexUXl5OeXl5nXNVVVUZ/QzzLFkj2sz6A28Ax7p7o20IZlYMVFRUVFBcXNzo9aZNg6OPhiVL\nYNCgzNcrIiKSayorKykpKQEocffK1l4vm7ozzgI+AGZk4mJjx0LXrpqlISIi0layIkSYmQFnAHe6\ne00mrtmtG4wZo3ERIiIibSUrQgRwJNAPuCOTF00kwgyNTz7J5FVFREQEsiREuPvj7t7R3V/L5HVL\nS2HLFpg1K5NXFREREciSENFW+veHAw9Ul4aIiEhbyOsQAaFLY8YMqK6OXYmIiEh+yfsQUVoKH34Y\ndvYUERGRzMn7EDF8OOyyi7o0REREMi3vQ0THjjBxokKEiIhIpuV9iIAwLmLhQli+PHYlIiIi+aMg\nQsS4cdCpk1avFBERyaSCCBE9e8LIkerSEBERyaSCCBEQZmnMmQPr18euREREJD8UTIhIJGDDhhAk\nREREpPUKJkQMHAgDBqhLQ0REJFMKJkSYhdaI6dPBPXY1IiIiua9gQgSEELFyJSxYELsSERGR3FdQ\nIWLkSNhhB3VpiIiIZEJBhYjOncOaEQoRIiIirVdQIQJCl8b8+bBqVexKREREclvBhYgJE8KvM2fG\nrUNERCTXFVyI6N0bhg1Tl4aIiEhrFVyIgNCl8dhjsGlT7EpERERyV8GGiLVr4amnYlciIiKSuwoy\nRAwdCrvvri4NERGR1ijIEGEWNuTS1uAiIiLpK8gQAaFLY9kyWLo0diUiIiK5qWBDxNix0LWrujRE\nRETSVbAhols3GDNGIUJERCRdBRsiIHRpPPUUfPJJ7EpERERyT0GHiNJS2LIFZs2KXYmIiEjuKegQ\n0b8/HHCAZmmIiIikIytChJn1NbN7zGyNma03swVmVtwen51IwIwZUF3dHp8mIiKSP6KHCDPrCcwD\nNgLjgcHAT4GP2+PzEwlYsybs7CkiIiLN1yl2AcCFwHJ3Pzvl3Nvt9eHDh8Muu4RZGoce2l6fKiIi\nkvuit0QARwHPm9kDZvaBmVWa2dnbfFeGdOwIEydqqqeIiEhLZUOI2Bv4PvAqMA64BbjJzE5trwIS\nCVi4EJYvb69PFBERyX3Z0J3RAZjv7pcmHy8wsyHAOcA9jb1p8uTJFBUV1TlXVlZGWVlZiwsYNy60\nSDz6KHz/+y1+u4iISNYpLy+nvLy8zrmqqqqMfoa5e0Yv2OICzN4CZrn7d1POnQNc7O79Gnh9MVBR\nUVFBcXHmJnAccQR0765uDRERyV+VlZWUlJQAlLh7ZWuvlw3dGfOAQfXODaIdB1dC6NKYPRvWr2/P\nTxUREcld2RAirgdGmNlFZraPmZ0MnA3c3J5FJBKwYQPMmdOenyoiIpK7oocId38emASUAS8BFwPn\nu/v97VnHwIEwYIC6M0RERJorGwZW4u4zgBkxazALrRF//jO4h8ciIiLSuOgtEdmktBRWroQFC2JX\nIiIikv0UIlKMGgU9emhDLhERkeZQiEjRuTOMH69xESIiIs2hEFFPIgHPPQerVsWuREREJLspRNQz\nYUL4debMuHWIiIhkO4WIenr3hmHD1KUhIiKyLQoRDSgthcceg02bYlciIiKSvRQiGpBIwNq18PTT\nsSsRERHJXgoRDTjoIOjbV10aIiIiTVGIaEDt6pUKESIiIo1TiGhEIgHLlsHSpbErERERyU4KEY0Y\nOxa6dlVrhIiISGMUIhrRrRuMGaMQISIi0hiFiCaUlsJTT0FVVexKREREso9CRBNKS2HLFpg1K3Yl\nIiIi2Uchogl77gkHHKAuDRERkYYoRGxDIgEzZkB1dexKREREsotCxDYkErBmDcyfH7sSERGR7KIQ\nsQ3Dh8Muu6hLQ0REpD6FiG3o2DFsD64QISIiUpdCRDMkErBwIaxYEbsSERGR7KEQ0Qzjx4cWiUcf\njV2JiIhI9lCIaIaePWHkSHVpiIiIpFKIaKZEAmbPhvXrY1ciIiKSHRQimimRgA0bYM6c2JWIiIhk\nB4WIZho4EPbZR10aIiIitRQimskstEY8+ii4x65GREQkvughwswuN7Oaesei2HU1JJGAd94J0z1F\nREQKXfQQkfQy0BvokzwOj1tOw0aNgh491KUhIiIC2RMitrj7andflTw+il1QQzp3DmtGKESIiIhk\nT4jY18xWmtnrZnavmfWLXVBjEgl47jlYtSp2JSIiInFlQ4h4FjgDGA+cA3wZmGtm3WMW1ZgJE8LA\nypkzY1ciIiISV/QQ4e6PuftD7v6yuz8OTAR2Ak6IXFqDeveGYcO0BLaIiEin2AXU5+5VZrYUGNDU\n6yZPnkxRUVGdc2VlZZSVlbVleUDo0rjmGti0KYyTEBERyTbl5eWUl5fXOVdVVZXRzzDPskUPzKwH\nsBy4zN1vbuD5YqCioqKC4uLidq8P4IUXoLg4LIM9ZkyUEkRERFqssrKSkpISgBJ3r2zt9aJ3Z5jZ\n1WY2ysz2NLN/Bx4GNgPl23hrNAcdBH37apaGiIgUtughAtgDuA9YAtwPrAZGuPuHUatqQu3qlQoR\nIiJSyKKHCHcvc/c93H17d+/v7ie7+5ux69qWRAKWLYOlS2NXIiIiEkf0EJGrxoyBLl00S0NERAqX\nQkSauncPQUJdGiIiUqgUIlohkYC5cyHDM2ZERERygkJEK5SWwpYtMGtW7EpERETan0JEK+y5Jxxw\ngLo0RESkMClEtFIiATNmQHV17EpERETal0JEK5WWwpo1MH9+7EpERETal0JEK40YATvvrKmeIiJS\neBQiWqljR5g4UeMiRESk8ChEZEAiAQsWwIoVsSsRERFpPwoRGTB+fGiRUJeGiIgUEoWIDOjZE0aO\nVJeGiIgUFoWIDCkthdmzYf362JWIiIi0D4WIDEkkYMMGePLJ2JWIiIi0D4WIDBk0CPbZR10aIiJS\nOBQiMsQstEZMnw7usasRERFpewoRGZRIwDvvwMKFsSsRERFpewoRGTRqFPTooS4NEREpDAoRGdS5\nM4wbpxAhIiKFocUhwsw6mdllZrZHWxSU6xIJeO45WL06diUiIiJtq8Uhwt23AP8JdMp8Oblv4sQw\nsHLmzNiViIiItK10uzPmAKMzWUi+6N0bhg1Tl4aIiOS/dFsTZgJXmdkBQAWwLvVJd5/a2sJyWSIB\n11wDmzaFcRIiIiL5KN0QMSX5608aeM6BjmleNy8kEnDZZfD00zBmTOxqRERE2kZa3Rnu3qGJo6AD\nBMBBB0HfvurSEBGR/KYpnm3ALGzIpa3BRUQkn6UdIsxstJlNM7PXksdUMxuZyeJyWSIBS5eGQ0RE\nJB+lFSLM7FvAE8B64Kbk8Tkw28xOzlx5uWvsWOjSRa0RIiKSv9JtibgY+Jm7n+juNyWPE4ELgUtb\nU5CZXWhmNWZ2XWuuE1v37mFQpcZFiIhIvko3ROwNTGvg/FTgy+kWY2aHAN8FFqR7jWySSMDcuVBV\nFbsSERGRzEs3RKwAxjZw/sjkcy1mZj2Ae4GzgU/SrCurlJbCli0wa1bsSkRERDIv3RBxLXCTmd1i\nZqcmj1uBG4Br0rzm74Fp7j4nzfdnnT33hCFD1KUhIiL5Ka3Fptz9FjN7H/gpcELy9GLgRHd/pKXX\nM7OTgIOAg9OpJ5slEnDbbVBdDR0LfgUNERHJJ+ns4tnRzEYBT7r74e6+S/I4PM0AsQehBeMUd9/c\n0vdnu0Qi7Oj5z3/GrkRERCSzWtwS4e7VZjYLGExmxi6UAL2ASjOz5LmOwCgz+yHQxd29/psmT55M\nUVFRnXNlZWWUlZVloKTMGTECdt45dGmMGBG7GhERKRTl5eWUl5fXOVeV4ZH+1sD387bfZPY8cIG7\nz251AWbdgT3rnb6T0D1ylbsvrvf6YqCioqKC4uLi1n58uzj1VHjpJXjxxdiViIhIIausrKSkpASg\nxN0rW3u9dAdWXgJcY2YJM/uSme2YerTkQu6+zt0XpR6EXUE/rB8gclUiAQsWwIq05q2IiIhkp3RD\nxAxgKGFdiHeAj5PHJ8lfW6vlzSNZbPz4MKhSq1eKiEg+SXcr8CMyWkU97p5XG2j37AmHHx5CxDnn\nxK5GREQkM1ocIsysEzAauN3d38l8SfkpkYBLL4X166Fbt9jViIiItF6LuzPcfQvwn6TfilGQEgnY\nsAGefDJ2JSIiIpmR7piIOYTWCGmmQYNgn320eqWIiOSPdFsTZgJXmdkBQAVhNsW/uPvU1haWb8xC\na8RDD8GUKeGxiIhILks3RExJ/vqTBp5zwmJRUk9pKdx4IyxcCEOHxq5GRESkddLqznD3Dk0cChCN\nGDUKevTQVE8REckPLQoRZjbDzIpSHl9oZj1THu9iZosyWWA+6dIFxo3TuAgREckPLW2JGA90SXn8\nc2DnlMedgEGtLSqfJRLw7LNhUy4REZFc1tIQUX84oIYHttDEieAOM2fGrkRERKR10p3iKWnq3RuG\nDVOXhoiI5L6Whgjni/ta5NU+F+2htBQeeww2b45diYiISPpaOsXTgDvNbGPycVfgVjOrXSeiS8Nv\nk1SJBFx+OTz9NBzRpruQiIiItJ2WtkTcBawCqpLHvcC7KY9XAXdnssB89JWvQN++6tIQEZHc1qKW\nCHc/s60KKSRmoUtj+nS49trY1YiIiKRHAysjSSRg6dJwiIiI5CKFiEjGjg2LT2n1ShERyVUKEZF0\n7w5jxmhchIiI5C6FiIhKS2HuXKiqil2JiIhIyylERFRaClu2wOOPx65ERESk5RQiItprLxgyBB54\nIHYlIiIiLacQEdmPfgR//jPcfnvsSkRERFqmpStWSoadfTb8859wzjkwaBAcdljsikRERJpHLRGR\nmcHNN8OIEfCNb8Dy5bErEhERaR6FiCzQuTM89BBsvz0ccwysW7ft94iIiMSmEJElevWCqVNh2TI4\n4wxw7Y0qIiJZTiEiixx4INxzDzz4IPzyl7GrERERaZpCRJaZNAmuvDJsFf7QQ7GrERERaZxmZ2Sh\nSy6Bl1+G006DAQNg6NDYFYmIiHxR9JYIMzvHzBaYWVXy+IeZfT12XTGZwR13hCmfRx8Nq1bFrkhE\nROSLoocIYAVwAVAMlABzgEfMbHDUqiLr1g0eeQQ2boRvfhM2bYpdkYiISF3RQ4S7P+ruf3X31939\nNXe/BPgMGBG7ttj69YOHH4b58+HcczVjQ0REskv0EJHKzDqY2UlAN+CZ2PVkg0MPhT/8AW67DX73\nu9jViIiIbJUVAyvNbAghNHQF1gKT3H1J3Kqyx+mnw8KFMHkyDB4MX/ta7IpERETAPAvayM2sE9Af\nKAKOA74DjGooSJhZMVAxatQoioqK6jxXVlZGWVlZO1Tc/qqrIZGAZ58N3Rv77hu7IhERyWbl5eWU\nl5fXOVdVVcXcuXMBSty9srWfkRUhoj4zexx4zd2/38BzxUBFRUUFxcXF7V9cRJ98EvbYMAthol6G\nEhERaVJlZSUlJSWQoRCRVWMiUnQAusQuItv07BmWxn7/fTjppNA6ISIiEkv0EGFmvzazkWa2p5kN\nMbPfAKOBe2PXlo0GDoT//V+YNQsuvDB2NSIiUsiihwhgN+AuYAnwBGGtiHHuPidqVVls3Di47jq4\n5hq4667Y1YiISKGKPjvD3c+OXUMu+tGPwoyN7343tE4cemjsikREpNBkQ0uEpMEMpkyBQw4Jm3at\nWBG7IhERKTQKETmsSxf4y1+gc2c49lhYvz52RSIiUkgUInLcbruFGRtLlsBZZ2lpbBERaT8KEXng\noIPg7rvDrI1f/Sp2NSIiUigUIvLEN78JV1wBl14aNu0SERFpawoReeTSS+G44+DUU8PMDRERkbak\nEJFHOnSAO+8M+2ocfTSsXh27IhERyWcKEXmme3d45BH4/PPQKrFpU+yKREQkXylE5KH+/cPUz2ee\ngfPO04wNERFpGwoReeqww+DWW+EPf4Df/z52NSIiko+iL3stbeess+Cll+DHP4bBg2Hs2NgViYhI\nPlFLRJ67+uoQHo4/Hl57LXY1IiKSTxQi8lynTnD//dCrV5ixUVUVuyIREckXChEFYKedwtLY774L\np5wC1dWxKxIRkXygEFEgBg0Ky2LPnAk//3nsakREJB8oRBSQ8ePDGInf/hbuuSd2NSIikus0O6PA\nTJ4cZmx85zswcCAMHx67IhERyVVqiSgwZmH9iOJimDQJVq6MXZGIiOQqhYgC1KVLWNGyY0c49tiw\nRLaIiEhLKUQUqD59woyNV16Bb39bS2OLiEjLKUQUsK98Be66C8rL4aqrYlcjIiK5RiGiwB1/PFx6\nKVx8cWiZEBERaS6FCOGKK8LYiFNOgZdfjl2NiIjkCoUIoUMHuPtu2HvvsDT2mjWxKxIRkVygECEA\n9OgBjzwCa9eGLo7Nm2NXJCIi2U4hQv5lr73C1M958+D882NXIyIi2U4hQuoYORKmTIFbbgmHiIhI\nY7TstXzB2WeHpbHPOw/22w+OOCJ2RSIiko2it0SY2UVmNt/MPjWzD8zsYTMbGLuuQnfttSE8HHcc\nvPFG7GpERCQbRQ8RwEjgd8Bw4EhgO2CWmW0ftaoC16lT2Dp8553DjI1PP41dkYiIZJvoIcLdJ7r7\nPe6+2N1fAs4A+gMlcSuTnXcOC1CtWAHf+hZUV8euSEREskn0ENGAnoADH8UuRGDw4LAs9vTpYWVL\nERGRWlkVIszMgBuAp919Uex6JJg4EX77W/jNb+C++2JXIyIi2SLbZmdMAfYHDtvWCydPnkxRUVGd\nc2VlZZSVlbVRaYXtpz8NMza+/W3Yd1845JDYFYmISFPKy8spLy+vc66qqiqjn2GeJXtAm9nNwFHA\nSHdf3sTrioGKiooKiouL260+gQ0bwoyN5cvhn/+Evn1jVyQiIi1RWVlJSUkJQIm7V7b2elnRnZEM\nEMcARzQVICSurl3DipZmMGkSfP557IpERCSm6CHCzKYApwAnA+vMrHfy6Bq5NGnAl74U9thYuBC+\n8x3IkoYsERGJIHqIAM4BdgT+BrybcpwQsSZpQkkJ3Hkn/OlPcPXVsasREZFYog+sdPdsCDLSQiee\nGAZaXngh7L8/JBKxKxIRkfamL3BJ25VXhtUsTz4ZXnkldjUiItLeFCIkbR06wD33wJ57hjDx4Yex\nKxIRkfakECGtssMOYWnsTz+FE06AzZtjVyQiIu1FIUJa7ctfhgcfhLlzYfLk2NWIiEh7UYiQjBg9\nGm6+GX7/ezj+eHj22dgViYhIW1OIkIz53vfgtttgwQI49FAYMSJsJ75lS+zKRESkLShESEaddRYs\nWRLGSXTvDiedBHvvHdaT+OST2NWJiEgmKURIxnXoAEcdBbNnw4svwtixcMklsMce8MMfwrJlsSsU\nEZFMUIiQNjV0KNxxB7z9dtgJ9IEHYNCgMCX0ySe1bLaISC5TiJB20acP/OIXYQfQP/4R3nwTxoyB\nr3wlLKG9cWPsCkVEpKUUIqRdde0axk0sXAiPPw677w5nnhkWrLrySli1KnaFIiLSXAoREoUZHHkk\nPPpoGIj5jW/AVVdB//5w9tnw8suxKxQRkW1RiJDoBg2CKVPgnXfgiivgr3+FAw6Ar30NZsyAmprY\nFYqISEMUIiRr7Lxz2BX0zTfhvvugqgpKS8MuobfeCuvWxa5QRERSKURI1tluOygrg+eeg6efhiFD\n4Ac/gH794KKLYOXK2BWKiAgoREgWM4PDDgv7crz2GpxxRlhWe6+94JRT4PnnY1coIlLYFCIkJ3z5\ny3DddWHcxDXXwDPPwCGHwOGHw0MPQXV17ApFRAqPQoTklB13hPPPD6te/uUvYXXM446DAQPg+uvD\nOAoREWkfChGSkzp2hEmTwvbjzz8fWiR+9rMwbuLHP4Y33ohdoYhI/lOIkJxXUgL33BOW1j7vvPDz\ngAFbQ4aW1hYRaRsKEZI3+vaFX/0KVqyAW24Ji1iNHg0HHwz33gubNsWuUEQkvyhESN7p1g2+9z14\n5RWYORN23RVOPTXM6vjVr2DNmtgViojkB4UIyVsdOsDXvw6PPRaW0S4thV/+Moyb+N73YPHi2BWK\niOQ2hQgpCP/2b/A//xO6Oi6+GKZODSthTpgQQobGTYiItJxChBSUXr3gkkvgrbfgrrvg/fdDa8WQ\nISFkfP557ApFRHKHQoQUpC5d4LTToLIS/vY32Hff0MXRvz9ceim8917sCkVEsp9ChBQ0szCD4//+\nD5YuDXt2XH897LlnCBkVFerqEBFpTFaECDMbaWZTzWylmdWY2dGxa5LCM2AA3HRTWFr7N7+Bv/89\nTA/dZ5+wgNWcObB5c+wqRUSyR1aECKA78CJwLqB/90lUPXvCT38Kr78epoh+/ethE7CxY8OYipNP\nhvvvh08+iV2piEhcWREi3P2v7n6Zuz8CWOx6RAA6dQoBYsqUMKujoiK0SCxZEro9evWCI48MrRdv\nvhm7WhGR9pcVIUIk25lBcTFccUUYjPn223DjjWEPj//4D9h7bzjwwDDzY/58qKmJXbGISNtTiBBJ\nQ//+cO65YY2JNWvggQdg6NDQajF8OOyxR5jt8eijmjYqIvlLIUKklXbcEY4/Pmz8tWpVmDJaVgaz\nZ0MiEZbdnjQJ7rgjPC8iki/Ms2z+mpnVAMe6+9RGni8GKkaNGkVRUVGd58rKyigrK2uHKkW2zT2M\nn5g6NRxhUNCTAAATsUlEQVTPPBPOH3ooHH10OPbbL3SViIhkWnl5OeXl5XXOVVVVMXfuXIASd69s\n7WfkbIioqKiguLi4fYsTaYVVq0L3xtSpMGsWrF8fppXWBorDDguDOUVE2kplZSUlJSWQoRCRFd0Z\nZtbdzIaa2UHJU3snH/eLWphIBu22G5x5Jjz8cBhHMX06jBkD5eXw1a+G5089NUwnXbs2drUiItuW\nFSECOBh4AaggrBNxLVAJ/CJmUSJtZfvtw66i//3fYXGr+fPhBz+AhQvD+Ipdd607vVREJBtlRYhw\n97+7ewd371jvOCt2bSJtrUMHOOSQsE35ggVhzYlrroEtW+D888NMkNTppVnWAykiBSwrQoSIbLXX\nXnDeefDEE7B6deju2G8/uOEGKCnZOr30r3+FjRtjVysihUwhQiSL9ewJJ50E990XAsXs2fDNb4YA\nMWFC6PY47ji4++4wzkJEpD0pRIjkiO22CwMxb7gh7Ovx0ktw0UVhTMXpp0Pv3mFH0muvhWXLYlcr\nIoVAIUIkB5nBkCHw85/Ds8/Ce++FQZpFRWHp7YEDYfBguOACmDcPqqtjVywi+UghQiQP9OkDZ58d\n1qD48EN45JGw7sSdd8Lhh4fnzzwzrKpZWRnWqBARaS0tbSOSZ7p127qAVU1NmD46dWoIFnfeGV5j\nFgZw7r9/3WPwYNhhh5jVi0guUYgQyWMdOsCIEeH49a/h00/DUtyLFm09Hnyw7lbm/fptDRSpAWOn\nneLdh4hkJ4UIkQKy444wbFg4Uq1bB6++WjdcPPoo3HTT1m3N+/T5YsvF/vtDr17tfx8ikh0UIkSE\n7t3Dglb1t6PZsAGWLoXFi7eGizlz4NZbw2JYEKaZNhQu+vTR5mIi+U4hQkQa1bUrHHhgOFJt3gyv\nvVa35eIf/4Dbb4dNm8Jrevb8YpfI/vuH7hKFC5H8oBAhIi223XYhIAweHBa/qrVlSxhfURssFi+G\nF18Mq27Wzgjp0aPhcLHXXmEMh4jkDoUIEcmYTp1g333DccwxW8/X1MDy5XVbLhYtgr/8ZeuOpdtv\nH5b3rj9bZJ99tEW6SLbSH00RaXMdOoSWhr32gokTt553h5Ur67ZcLFoEM2bAxx+H13TuHBbPqt9y\nse++4TkRiUchQkSiMYM99gjHuHFbz7vDqlVfbLn429/CeYCOHeFLXwpH3751f039uVev8FoRyTyF\nCBHJOmZhL5DeveGII+o+t2ZNaLFYvDjsG/Luu2HZ7+eeCz+vWlV3u/SOHcN1thU2dttN3SYiLaU/\nMiKSU3bdFUaODEdDtmyBDz4IweK997aGjNpfKypg2rTwmto1MCB0uey2W+Mho/bX3r3DwFIRUYgQ\nkTzTqRPsvns4mlJdHVotGgsbCxbAzJnw/vt1NzAzC10k2+pK6dNHYzYk/ylEiEhBSh1T0ZSaGli9\nuvGw8cor8PjjIWxs3lz3vbvu2ryw0bVr292nSFtSiBARaUKHDlvHZxx0UOOvq6kJO6g2FjZefRWe\nfDL8XLsgV62ddto6CHTXXbce9R/XnuvWrW3vWaS5FCJERDKgQ4fwBd+r1xdX+EzlDh999MWw8d57\nYdDomjVhwa7anxvatn377ZsXNmp/3mUXjeOQtqEQISLSjszCl/ouu8CQIdt+/fr1WwNF7bF6dd3H\n774bxnDUPk4dw1GrqGjbYSP1XFGRVhCVbVOIEBHJYt26Qf/+4WgOd6iqajhspJ579VWYNy/8XLuw\nV6qOHUPQaUmLR7du2hel0ChEiIjkEbOw+VnPnjBgQPPes3lz6GJpqrVjzRp4662mu1m6dt0aLHba\nCXbYIRw77tiyn7U4WO5QiBARKXDbbbd18GhzrV8fBpI21trx8cdhX5S334ZPPw0/r10bft64selr\nd+uWXvho6GeNBWlbChEiItJi3bqFo1+/lr938+atgSI1XGzr55Urv3i+oRaRVF26ZCaMdO0arrXd\nduqySaUQISIi7Wq77WDnncPRWlu2wGefNRw6mgokq1fDG2/UPf/ZZ3WXTG9Mly7h6Nx568+pRybO\nt/QasQbBKkSIiEjO6tRp6xiQ1qqpgXXrGg4dn38e1vfYuPGLR1Pn168PXTvNeX1Ds2qaq2PH5oWO\nDRta//uUSiFCRESE8K/52m6MGKqr0wsqLTn/wQcZLtrds+IAfgC8CXwOPAsc0sjrigGvqKjwfHHf\nfffFLiGj8ul+8ule3HU/2Syf7sVd95OtKioqHHCg2DPw3Z0VS4mY2YnAtcDlwFeABcBjZrZr1MLa\nSXl5eewSMiqf7ief7gV0P9ksn+4FdD+FIitCBDAZ+G93v9vdlwDnAOuBs+KWJSIiIo2JHiLMbDug\nBJhde87dHXgCODRWXSIiItK06CEC2BXoCNQf7vEB0Kf9yxEREZHmyMXZGV0BFi9eHLuOjKmqqqKy\nsjJ2GRmTT/eTT/cCup9slk/3ArqfbJXy3dk1E9czb87KGm0o2Z2xHvimu09NOX8nUOTuk+q9/mTg\nT+1apIiISH45xd3va+1FordEuPtmM6sAxgJTAczMko9vauAtjwGnAG8BGV42Q0REJK91BfYifJe2\nWvSWCAAzOwG4kzArYz5htsZxwH7uvjpiaSIiItKI6C0RAO7+QHJNiCuB3sCLwHgFCBERkeyVFS0R\nIiIiknuyYYqniIiI5CCFCBEREUlLzoQIMxtpZlPNbKWZ1ZjZ0bFrag0zu8jM5pvZp2b2gZk9bGYD\nY9eVDjM7x8wWmFlV8viHmX09dl2ZYmYXJv+fuy52Lekws8uT9acei2LXlS4z62tm95jZGjNbn/x/\nrzh2Xekwszcb+G9TY2a/i11bOsysg5n90szeSP63ec3MLoldV7rMrIeZ3WBmbyXv52kzOzh2Xc3R\nnO9MM7vSzN5N3tvjZjagpZ+TMyEC6E4YcHkuYQeyXDcS+B0wHDgS2A6YZWbbR60qPSuACwg7rJYA\nc4BHzGxw1KoywMwOAb5L2BQul71MGLTcJ3kcHrec9JhZT2AesBEYDwwGfgp8HLOuVjiYrf9N+gBf\nI/z99kDMolrhQuB7hL+n9wN+BvzMzH4Ytar03UZYbuAUYAjwOPCEmX0palXN0+R3ppldAPyQ8Pfb\nMGAdYePLzi35kJwcWGlmNcCxqYtT5brk7JRVwCh3fzp2Pa1lZh8C/+Hud8SuJV1m1gOoAL4PXAq8\n4O4/iVtVy5nZ5cAx7p6T/1pPZWZXAYe6++jYtbQFM7sBmOjuudoqOQ14392/k3LuQWC9u58Wr7KW\nM7OuwFrgKHf/a8r554EZ7n5ZtOJaqKHvTDN7F7ja3a9PPt6RsN3E6e7e7BCbSy0R+a4nIS1+FLuQ\n1kg2Z54EdAOeiV1PK/0emObuc2IXkgH7Jps1Xzeze82sX+yC0nQU8LyZPZDsBqw0s7NjF5UJydV7\nTyH86zdX/QMYa2b7ApjZUOAwYEbUqtLTibCv08Z65z8nR1vyapnZlwktX6kbX34KPEcLN77MinUi\nCl1yhc4bgKfdPSf7qs1sCCE01Kb3Sclt3XNSMggdRGhuznXPAmcArwJfAq4A5prZEHdfF7GudOxN\naBm6FvgVoRn2JjPb6O73RK2s9SYBRcBdsQtphauAHYElZlZN+Ifqxe5+f9yyWs7dPzOzZ4BLzWwJ\n4V/pJxO+ZJdFLa71+hD+0drqjS8VIrLDFGB/QmLPVUuAoYS/BI8D7jazUbkYJMxsD0KoO9LdN8eu\np7XcPXV525fNbD7wNnACkGvdTR2A+e5+afLxgmSAPQfI9RBxFjDT3d+PXUgrnEj4oj0JWEQI4jea\n2bs5GvK+BdwOrAS2AJXAfYSxX4K6M6Izs5uBicBX3f292PWky923uPsb7v6Cu19MGIh4fuy60lQC\n9AIqzWyzmW0GRgPnm9mmZMtRznL3KmAp0OKR2FngPaD+Fr6Lgf4RaskYM+tPGGD9P7FraaXfAle5\n+5/d/RV3/xNwPXBR5LrS4u5vuvsRhEGK/dx9BNAZeCNuZa32PmCEwdapeiefazaFiIiSAeIY4Ah3\nXx67ngzrAHSJXUSangAOIPwramjyeB64FxjquTgaOUVywOgAwhdyrpkHDKp3bhChZSWXnUVoSs7F\nsQOpugHV9c7VkOPfNe7+ubt/YGY7EWYF/V/smlrD3d8khIWxteeSAyuHE8a1NFvOdGeYWXfCX3y1\n/wrcOzlo5yN3XxGvsvSY2RSgDDgaWGdmtYmwyt1zandSM/s1MBNYDuxAGBw2GhgXs650JccJ1Bmb\nYmbrgA/dvf6/grOemV0NTCN80e4O/ALYDJTHrCtN1wPzzOwiwjTI4cDZwHeafFcWS7ZsnQHc6e41\nkctprWnAJWb2DvAKYdr3ZOCPUatKk5mNI3znvArsS2hpWUTYMDKrNeM78wbCf6vXCLti/xJ4B3ik\nRR/k7jlxEL6UaggpN/W4PXZtad5PQ/dSDZwWu7Y07uWPhOa9zwnpdhYwJnZdGb7HOcB1setIs/by\n5F8OnxOC3n3Al2PX1Yr7mQgsBNYTvqjOil1TK+/na8k/+wNi15KBe+kOXAe8SVh3YBkhtHaKXVua\n93M88Fryz85K4EZgh9h1NbP2bX5nEgZZv5v8s/RYOv8P5uQ6ESIiIhJfTvdTiYiISDwKESIiIpIW\nhQgRERFJi0KEiIiIpEUhQkRERNKiECEiIiJpUYgQERGRtChEiIiISFoUIkRERCQtChEiecbM9jSz\nGjM7MHYttcxskJk9Y2afm1llGu/PunsSEYUIkYwzszuTX3g/q3f+GDNrrw2Wsm09+18AnxE2MRq7\njdc2JmP3ZGanm9nHmbqeSKFSiBDJPCds2HOBmRU18Fx7sG2/pIUXNNuuFW/fB3ja3d9x93S/vDN5\nT0YG/luYWYfkLpwiBUkhQqRtPEHY0fTnjb3AzC43sxfqnTvfzN5MeXyHmT1sZheZ2ftm9rGZXWJm\nHc3st2b2oZmtMLMzGviIwWY2L9mF8JKZjar3WUPMbIaZrU1e+24z2yXl+SfN7Hdmdr2ZrQb+2sh9\nmJldlqxjg5m9YGbjU56vIWwJfbmZVZvZZU1c52dmtix5nbeSW3439Noz6rck1G/pMbMDzWyOmX1q\nZlVm9k8zKzaz0cDtQFGyxehfNZlZZzO7xszeMbPPkl0wo1OueXryv8FRZvYKsAHoZ2ZfNbPnku/5\n2MyeMrN+DdUukk8UIkTaRjUhQJxnZn2beF1D/xquf24M8CVgJDAZuBKYDnwEDANuBf67gc/5LXA1\ncBDwDDDNzHYCSLaQzAYqCF/w44HdgAfqXeM0YCPw78A5jdzDj5N1/QQ4gLCl8FQz2yf5fB9gEXBN\n8j6uaeQ6VwE/I3R9DAZOJASxhjjb/r37E7ACKCHc41XAZmBesuZPgd71avo9MBw4IXkvfwZmptwL\nQLdknd8G/g34GHgYeBIYAowA/tBIfSL5Jfae5zp05NsB3AH8JfnzP4D/Sf58DFCd8rrLgcp67z0f\neKPetd6o95rFwN9SHncA1gInJB/vCdQA/5Hymo7A8tpzwMXAzHrX3SP5vgHJx08Czzfjft8BLqh3\n7jngdymPXwAua+IaPQhdQGc28nztPR2YfHw68FG919T//a0CTm3keg29vx8hZPSpd/5x4P+lvK8a\nGJLy/E7JcyNj/7+nQ0d7H51akDdEpOUuAGabWWP/+m6OV+o9/gB4qfaBu9eY2YeEloRUz6a8ptrM\nnif8Cx9gKDDGzNbWe48Txi+8lnxc0VRhZrYD0JcQllLNA1oyk2Iw0BmY04L3bMt1wG1mdhqhe+nP\n7v5GE68/gBC2ltYb59AZWJPyeJO7v1z7wN0/NrO7gFlm9njysx5w98ZaUUTyhrozRNqQuz9FaN6/\nqoGna/jiYMGGBi9urn/ZRs615M9zD2Aq4Yt+aMqxLzA35XXrWnDN1vi8ha/f5u+du/8C2J/Q9TMG\nWGRmxzRxzR7AFkLXR+rvyWBCC1Gjtbr7WYRujHmEbphXzWxYC+5HJCcpRIi0vYuAo4BD651fTRgv\nkOorGfzcEbU/mFlHwtiARclTlYT+/Lfd/Y16R7O/0N19LfAucFi9pw5L+azmWEYYpNjc6Z+rgR3M\nbPuUc1/4vXP319z9RncfD/wFODP51CZCq0OqF5Lnejfwe7JqWwW5+wJ3/y93P4zQenRyM+9FJGcp\nRIi0sWTT95+AH9V76m9Ar+SMhL3N7AfA1zP40T8ws2PNbBAwBehJGGMBYQDhzsD9ZnZw8vPHm9nt\naUxZvJownfUEMxtoZlcR/gV/Y3Mv4O4bgf8CfmtmpybrGW5mZzXylueA9cBvkq89mTBeAQAz65qc\nWTLazPqb2WHAIWwNNm8BPcxsjJntYmbbu/sy4D7gbjObZGZ7mdkwM7vQzCY0Vnvydb82sxHJzxpH\naNFpSYgSyUkKESLt4zLCn7d/jdh39yXAucnjReBgwhfytjRnRocDFyaPFwmzK45y94+Sn/0eobWg\nA6G7ZSFhDMHH7u6NXLMxNyXfe03yOuOSn/X6NmquW7D7lcC1hNkZi4D7gV4NXcPDWhPfAiYkP/NE\nwkDVWtXALsBdwKvJaz0KXJF8/zOEWS3/C6wC/jP5vjOAu5P3soTQenEwYVBqY9YD+wEPJj/rVsKg\n0j9s655Fcp1t/ftCREREpPnUEiEiIiJpUYgQERGRtChEiIiISFoUIkRERCQtChEiIiKSFoUIERER\nSYtChIiIiKRFIUJERETSohAhIiIiaVGIEBERkbQoRIiIiEha/j/BWPff1IlBNQAAAABJRU5ErkJg\ngg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "\n", + "%matplotlib inline\n", + "\n", + "temp_df=df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses']]\n", + "\n", + "\n", + "def evaluate_clusters(metrics_df,max_clusters):\n", + " error = np.zeros(max_clusters+1)\n", + " error[0] = 0;\n", + " for k in range(1,max_clusters+1):\n", + " kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10)\n", + " kmeans.fit(metrics_df)\n", + " error[k] = kmeans.inertia_\n", + "\n", + " plt.plot(range(1,len(error)),error[1:])\n", + " plt.xlabel('Number of clusters')\n", + " plt.ylabel('Error')\n", + "\n", + "\n", + "evaluate_clusters(temp_df,10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Through the graph we determined number of clusters to be 5" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----Initialization complete-------\n", + "data1 = 38240\n", + "data2 = 19794\n", + "data3 = 23592\n", + "data4 = 20410\n", + "data5 = 6126\n", + "data1 length after 1st merge = 58034\n", + "data1 length after (subtract 19120, have spaces after scraping): 81626\n", + "normalized dataframe\n", + " 0 1 2 3 4\n", + "0 2.131339e-06 1.977141e-07 3.391377e-06 -1.694160e-07 1.000000\n", + "1 3.518269e-05 0.000000e+00 1.425105e-04 -1.555219e-06 1.000000\n", + "2 2.448563e-06 1.278658e-07 1.702455e-06 2.302254e-07 1.000000\n", + "3 7.552761e-06 1.598110e-08 5.969663e-04 1.243570e-05 1.000000\n", + "4 2.206591e-05 1.048490e-06 3.853569e-05 3.765625e-06 1.000000\n", + "5 1.247935e-06 1.211890e-06 -2.897675e-07 -6.435478e-09 1.000000\n", + "6 3.457386e-06 8.545792e-07 1.777695e-06 2.701533e-07 1.000000\n", + "7 4.520587e-07 1.642210e-07 1.010448e-05 3.478109e-07 1.000000\n", + "8 1.832367e-06 3.001670e-07 8.991878e-07 -3.633486e-08 1.000000\n", + "9 7.470870e-06 0.000000e+00 8.500239e-05 1.197957e-06 1.000000\n", + "10 2.005343e-05 0.000000e+00 4.429372e-04 6.795234e-06 1.000000\n", + "11 1.642175e-06 0.000000e+00 2.079079e-06 1.124058e-07 1.000000\n", + "12 3.046760e-06 1.193733e-08 7.965182e-07 -9.820306e-08 1.000000\n", + "13 4.744364e-06 4.632382e-06 3.273665e-07 6.344877e-07 1.000000\n", + "14 3.280780e-06 2.014551e-06 0.000000e+00 -2.345903e-07 1.000000\n", + "15 0.000000e+00 0.000000e+00 4.723240e-07 -7.328753e-08 1.000000\n", + "16 3.821023e-05 0.000000e+00 0.000000e+00 4.110799e-06 1.000000\n", + "17 8.731085e-07 0.000000e+00 0.000000e+00 -7.170261e-06 1.000000\n", + "18 2.305827e-07 1.792308e-08 1.377174e-07 -1.647954e-08 1.000000\n", + "19 0.000000e+00 7.405453e-07 7.969953e-07 3.496711e-08 1.000000\n", + "20 5.332096e-06 2.762217e-07 2.232079e-06 8.690540e-07 1.000000\n", + "21 0.000000e+00 3.165589e-08 7.110422e-08 -1.760049e-08 1.000000\n", + "22 5.863323e-07 1.213551e-08 9.222656e-07 -1.780591e-08 1.000000\n", + "23 6.075890e-07 4.660744e-08 2.181732e-06 1.247884e-07 1.000000\n", + "24 7.654044e-07 0.000000e+00 6.994364e-07 7.692317e-08 1.000000\n", + "25 6.029556e-07 3.681503e-08 1.304060e-04 -1.261360e-07 1.000000\n", + "26 0.000000e+00 3.282049e-08 3.332658e-08 -1.507132e-08 1.000000\n", + "27 2.860444e-08 1.018467e-08 5.368263e-09 1.236626e-09 1.000000\n", + "28 2.802819e-06 8.261259e-08 2.910608e-06 -6.247890e-08 1.000000\n", + "29 1.264341e-06 1.023200e-07 1.673237e-06 3.164497e-07 1.000000\n", + "... ... ... ... ... ...\n", + "75744 9.398315e-06 0.000000e+00 9.509536e-06 0.000000e+00 1.000000\n", + "75745 0.000000e+00 3.200816e-10 6.816613e-07 -1.922601e-08 1.000000\n", + "75746 0.000000e+00 1.919854e-08 8.767459e-10 -1.607975e-09 1.000000\n", + "75747 5.699027e-07 3.801801e-08 3.802091e-07 4.646013e-08 1.000000\n", + "75748 0.000000e+00 6.340360e-07 7.657794e-07 -3.099764e-08 1.000000\n", + "75749 0.000000e+00 0.000000e+00 4.779656e-05 3.354993e-05 1.000000\n", + "75750 0.000000e+00 2.050710e-06 1.006396e-04 -2.591285e-06 1.000000\n", + "75751 0.000000e+00 1.776056e-08 3.154694e-06 8.715013e-07 1.000000\n", + "75752 2.012123e-07 6.863091e-08 2.711069e-07 -1.230454e-08 1.000000\n", + "75753 0.000000e+00 5.640484e-07 8.674260e-05 -9.016582e-06 1.000000\n", + "75754 3.429249e-06 1.927893e-06 5.185698e-07 -1.618661e-07 1.000000\n", + "75755 0.000000e+00 4.808916e-08 2.906421e-05 6.753696e-07 1.000000\n", + "75756 5.519192e-07 2.110600e-09 9.192742e-07 2.186902e-07 1.000000\n", + "75757 6.568868e-06 1.942418e-07 5.048166e-05 6.165004e-06 1.000000\n", + "75758 4.783169e-08 7.326701e-08 -1.605745e-08 -4.076068e-09 1.000000\n", + "75759 5.249886e-06 2.195538e-06 9.035736e-01 5.195984e-04 0.428433\n", + "75760 2.003230e-06 3.103842e-06 -3.819145e-06 2.964244e-08 1.000000\n", + "75761 3.533750e-06 3.740158e-08 1.980132e-05 1.109597e-06 1.000000\n", + "75762 1.152416e-06 1.264933e-06 4.999374e-07 -3.510865e-08 1.000000\n", + "75763 3.031673e-08 6.471599e-09 2.858051e-08 -4.052156e-09 1.000000\n", + "75764 1.666455e-05 3.514561e-06 7.653386e-05 -6.254289e-06 1.000000\n", + "75765 7.679648e-07 9.494667e-08 5.871411e-06 1.967259e-08 1.000000\n", + "75766 2.658072e-08 3.010748e-08 0.000000e+00 0.000000e+00 1.000000\n", + "75767 4.956654e-06 8.137010e-07 6.693813e-06 3.507152e-07 1.000000\n", + "75768 3.552866e-06 1.942938e-09 2.345908e-04 1.211465e-06 1.000000\n", + "75769 1.300652e-06 3.203959e-07 1.435191e-06 -3.396852e-08 1.000000\n", + "75770 1.389913e-07 7.084098e-08 9.115801e-08 4.874215e-09 1.000000\n", + "75771 3.667711e-06 4.065639e-08 6.724633e-06 1.832371e-07 1.000000\n", + "75772 3.156181e-08 1.337271e-08 3.266378e-08 1.162375e-08 1.000000\n", + "75773 5.513871e-07 2.581028e-07 5.323596e-06 3.940635e-08 1.000000\n", + "\n", + "[75774 rows x 5 columns]\n", + "compute kmeans clusters\n", + "-------------------------------------\n", + "silhoutte coefficent : 0.997794498966\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGHCAYAAACTRAlZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xl8XXWd//HXJ0nbpKUrS9PNbmmBEW1JCkVQQDpQoI6D\nyqhFBQVXFJii4sgwI2ptHWYEWUQdQRaRCuMy4w+hRUAEnYFKUgpo6V7oCoWu0qQ0yef3xzmnubnN\nzV1yl3OT95PHfST33O/3nO89vSSffL6buTsiIiIicVZR6gaIiIiIpKOARURERGJPAYuIiIjEngIW\nERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARUQkgZndY2arS92OXJjZ\nZDNrN7MLSt0WkXxTwCKSB2Z2UfiLoj7p+BAzW2pm+8zsrFK1T7Li4aMgzOzzZvaxQp2fArZdpJSq\nSt0AkV6k0y8KMxsM/BY4DjjP3R8uSaskbr4AbAR+ku8Tu/taM6tx9zfzfW6RUlOGRaQAzOww4GHg\n7cD7FawcKqH74uQs680K640uVNvKjZlVmVkVgIIV6a0UsIjkmZkNApYA0wmClcUZ1vuDmTWZ2TQz\ne8LM3jCzVWb2vvD1dyd0L/3FzN7dxTnGmNmdZrbNzFrM7HkzuyipzAAz+6aZNZrZLjP7q5k9bmbv\nSioXBRSXm9lnzGytmTWb2VNmdnxS2VFmdpeZbQqvu8XMfmVmY9O87Vy7L3Lu9jCzOWb2ezPbY2a7\nw/fzwW7KRwHSyUnHDxkvku4+mNlGYCrwt2HddjN7OKH+MDO7ycxeDuuvMrMvpbjuFWZ2pZmtBZqB\nqSnadI+Z7TSzsWb2azPba2avmtm3u3ivh5vZT8P7ssPMbjez4zUuRuJAXUIi+XUYsBhoAD7g7g9l\nUdeBI4BfA/cCPwM+D/wsHPPwXeB7wD3AVcB/mdk4d28GMLNaYCnwJnAT8DpwLnCHmQ1y91vD6wwD\nPg4sAn4IDAE+CTxsZjPc/c9J7boIGAjcChjwFeAXZlbn7u1hmf8G6sLrvgyMBM4CxgKbunnPlsX9\n6TEz+yTwn8ByYAGwCzgemA3c303VTAOkdPfhCwT38XVgIcH73xq2bSDwJHAU8IOw/DuB68zsKHe/\nKulanwL6hWXfDN9LTYq2VxFk/J4Evhi26ctmttrdbw+vXwE8RBBofw9YDZwH/DiL9y9SOO6uhx56\n9PBB8Eu9HVgPtADvyeEcTwJtBFmZ6Nix4XkPAMcnHD8nPH5BwrE7CX5JDk067/3Aa0C/8HkFUJVU\nZijwKvD9hGOTw2tsAw5LOP6+sJ1nhc8PD8tdnuX7jc5/cpb1ZoXXH51lvWHAXuCJ6F6kKPcTYFUX\n1zs5qVzU/guyuQ/ACuDhLo5fC+wGJiQdvw7YD9QmXfd1YFh3bUp4P23AVUllnwX+N+H5B8O6n00q\n97uw/gXdvS899Cj0Q11CIvl1FEHA0l1WoTu73P2X0RN3XwH8FXje3ZcllHs6/DoJwMyMIJD4H6Aq\nTO0fbmaHE/xlPZzgL2fcvd3dW6N6Zjac4C/1Z4BOs5xC97r7XxOeP0mQGZgUPn+DIKB6t5kNTfXG\nzGxQUrtGhC8NTTxuwWDlxHpDkupF1xiRVG9QqmuHZhNkiha6+4E0ZXOR0X3oxvnA48DepPf7CMG/\nz7uSyt/v7ruyOP9/Jj3/Ax3/hhDcnxaCjEqiKLMmUlLqEhLJHwc+A9wALDGzd7p7p/U8zKyajl+4\nAO7uryY87yrQ2U0wqyT5GASBCEAtMBi4lKAbqau2HZXQjk8AVwJH0/nnwKou6iZfe2fitd29xcyu\nBr4NvGpm/wc8ANyd9N5+AHyki/P/Jun5IwRdFomvn9JFvecSvnfgduDTXZSLTA6/Jnd55UUW9yGV\nKQQZte1dnZ6Ef7/Qhiya99cugpuddHx+AMYDm/3QQbtrsriOSMEoYBHJr78QdNc8BvzWzE5x980J\nr38E+FHC81agf8LzthTnTXU8+ss3ypbeRTDGpSvLAczs4wS/3H9OMI5ie3j+fwHG5HBt3P07ZvYr\ngjEPs4H5wFfN7DR3fyEstgC4I6H+aOBu4Ao6BxGvJ13nCoLunEg98G/Ah5PKJt7nfEo1fqPykIKZ\n3YdUjGD803dSvL4y6XlzmvMlSvtvKBJ3ClhE8szdnzGz8wgyA781s3e5e/SL9TfA3yYUbz/kBLnZ\nRtAlUeHuj6Up+wFgpbt3mhljZgt60gB3XwdcD1xvZlMIAqQrgYvD11cQjN+IrhdlPBrd/X+7OW9T\nUjujX7J/dPctWTRxLcEv6OMIxvpkamdYb1jS8QldFU53H0gdAK0DBmXw71coLwEnm1n/pCzLlBK1\nR6QTjWERKYDwl85cgh/2iy1YlwV33+bujyU8Hs/T9dqAXwEfNLNjk183syMSnh7y17aZnQKckMu1\nzazGzAYkHV5HMPYm+XgpLSEI6q42s/7pCifYQBBYnpp0/FISgo8s7sMbHBr8QDA4+l1mdkbyC+F0\n50MyOnm2BKgGLkm4rpH0PkVKRRkWkfzplF539/82s08RDGL8f2Z2trvvL+D1ryL4pbrUzH5EkM0Y\nAcwgGLBZG5Z7AHivmf2SYBrrZIKxN38htwDjbwiCsvvDc7QRDCA9nGDqdCFk3ZXh7rvM7IvA9wnu\n0c8IpgJPI5g19MkU9XaG9+rKcOrvBuDvCN5fokzvQyNwSTjeZS2wzd1/T9DN9XfAQ2Z2B7CMYJr8\n24H3E3TX7cn2fWfh52HbbjSzownGM51HMDYKFLRIiSlgEcmfQ36gu/udZjYC+HfgfjN7n3esXZLR\nOUi9t02n4+6+zcxOAL5G8AuulmCMxwsEa6dE5W4zs6MI1vGYTfDL9cPAR4ETc7j2SwRrxswCPkYw\nLmcFwTo0D3TzXqPz5CKneu7+n2a2leB+XEMwq2cFh44bST7/pQQZ6c8RzKRZBHyZcFxQKNP7cC3B\nuixfIQhIHgV+7+5vmNk7gX8mCHQuIhhcvSpsa+JMre72O0r179VtWXdvN7NzgBuBT4Tt/xXwTeD3\n4fsWKRlzV9AsIiJdM7PzgfuAk9z9T6Vuj/RdZTeGxczeFS4vvTlcLvq9acqfZh1LYEePtvAvTBER\nCYXT7hOfVxCszruLYKE5kZIpxy6hQQT/49wO/DJN2YgT7N+x9+CBzNZFEBHpS261YBPFpwkG4J5P\n0E345QIttieSsbILWDzYSG4xdJremInt7l7IAWsiIuXuUWAeweDfaoL9hC519x+WtFUilGHAkiMD\nng3TnS8A13a37oOISF/k7j8Fflrqdoh0pezGsORgK8GUzQ8QzJzYCDxuZtNL2ioRERHJWFnPEjKz\nduA8d/91lvUeB15y94tSvH44wXTPDWgqn4iISDaqCVaCXpKwyneP9ZUuoWRL6XoztchslBYVERHp\niY8A9+brZH01YJlO0FWUygaAe+65h2OPPWSVcymQefPmccMNN5S6GX2K7nnx6Z4Xn+55ca1YsYKP\nfvSjkN2O4mmVXcBiZoOAOjqW5p5kZtOAHe6+0cwWAqOj7h4zuwJYT7AbbDXB6p7vBs7s5jItAMce\neyz19fWFeSNyiKFDh+p+F5nuefHpnhef7nnJ5HVIRdkFLAT7ovyOjqWpoyW17yLYDbUWGJdQvn9Y\nZjSwD3gOmOXuTxSrwSIiItIzZRewhJuEpZzd5O6fSHr+7wT7uIiIiEiZ6gvTmkVERKTMKWCR2Jg7\nd26pm9Dn6J4Xn+558eme9w5lvQ5LoZhZPdDY2NiogVoiIiJZaGpqoqGhAaDB3ZvydV5lWERERCT2\nFLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsKWERERCT2\nFLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsKWERERCT2\nFLCIiMTF3/xNqVsgEltVpW6AiIgAZh1f3UvbFpEYUoZFRKTUXnml++eRtWsL3xaRmFLAIiJSarW1\n3T8H+Na3oK4OFi4sTptEYkYBi4hIKaXKpiQeb28PAhaA+fOD5yJ9jAIWEZFS6iqbknx8/nxobg6+\n37cPFiwofLtEYkYBi4hIqaTKriS+3t4O3/525+MLFyrLIn2OAhYRkVJJlV1JfD0xuxJRlkX6IAUs\nIiKlkC67Epk/v+vjyrJIH6OARUSkFNJlVyIHDnR9XFkW6WMUsIiIFFum2ZV0lGWRPkQBi4hIsWWa\nXUlHWRbpQxSwiIgUU76yKxFlWaSPUMAiIlJMRx+d3/MpyyJ9hAIWEZFi2rULKivze05lWaQPUMAi\nIlJsra3BjsxdPf70p+zPt28fNDXlv50iMVJV6gaIiEiCGTNg8WLYti3zOrW1QT2RXkwBi4hI3Mye\nXeoWiMSOuoREREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURECmfHjlK3QHoJBSwiIlIYS5bA\nUUfBww+XuiXSCyhgERGR/HOHq6+Gtrbgq3upWyRlTgGLiIjk3+LFHavvNjYG2RaRHlDAIiIi+eUO\n11zTsWdSZWXwXFkW6QEFLCIikl9RdqWtLXje1qYsi/SYAhYREcmf5OxKRFkW6SEFLCIikj/J2ZWI\nsizSQwpYREQkP1JlVyLKskgPlF3AYmbvMrNfm9lmM2s3s/dmUOd0M2s0sxYzW2VmFxWjrSIifUqq\n7EpEWRbpgbILWIBBwLPApUDaMN3MJgAPAI8C04AbgdvM7MzCNVFEpI9Jl12JKMsiOaoqdQOy5e6L\ngcUAZmYZVPkcsM7drwqfrzSzdwLzgN8WppUiIn1M4ror3UnMspx9duHbJb1GOWZYsnUS8EjSsSXA\nO0rQFhGR3ifKrlRk+CulokJZFslaXwhYaoFXko69AgwxswElaI+ISO+yZUuQXWlvz6x8e3uQZdmy\npbDtkl6l7LqEREQkZsaMgZUrYffuzOsMHRrUE8lQXwhYtgEjk46NBPa4+/7uKs6bN4+hQ4d2OjZ3\n7lzmzp2b3xaKiJS7qVNL3QIpgUWLFrFo0aJOx3ZnE7hmwbyM+xDNrB04z91/3U2ZbwPnuPu0hGP3\nAsPc/dwUdeqBxsbGRurr6/PdbBERkV6rqamJhoYGgAZ3z2AkdmbKbgyLmQ0ys2lmNj08NCl8Pi58\nfaGZ3ZVQ5QdhmX8zs6PN7FLgfOD6IjddREREclR2AQswA1gGNBKsw/IdoAn4evh6LTAuKuzuG4A5\nwN8SrN8yD7jE3ZNnDomIiEhMld0YFnf/Pd0EWu7+iS6OPQE0FLJdIiIiUjjlmGERERGRPkYBi4iI\niMSeAhYRERGJPQUsIiIiEnsKWERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iI\niMSeAhYRERGJPQUsIiIiEnsKWERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iI\niMSeAhYRERGJvapSN0BS+/ivPs66XesyLj9p2CTufN+dhWuQiIhIiShgialrH7+Wu567K6s6T778\nJBOGT+Da068tTKNEMrQDGFHqRohIr6IuoZi6+uSri1pPJF+WAEcBD5e6ISLSqyhgian+/fszYeiE\nrOpMGjaJ/v37F6ZBIhlw4GqgLfzqpW2OSObWri11CyQNBSwxtvLSlVmVX/G5FQVqiUhmFgNN4feN\nBNkWkdj71regrg4WLix1S6QbClhiLJssi7Ir0lNnAj35G9OBa4DK8Hll+FxZFom19vYgYAGYPz94\nLrGkgCXmMs2yKLsiPWHAI0Ad8HmCwGVHwtdMRNmVtvB5G8qySBmYPx+am4Pv9+2Da64pbXskJQUs\nMZdJlkXZFemJnUnPbyUIXA5P+Pptug9ckrMrEWVZJNba2+Hb3+58bOFCWLy4NO2RbilgKQPpsizK\nrkhPZDL9+KsEgcvPCbIu0SOSnF2JKMsisZaYXUl0ySXgCrPjRgFLGeguy6LsivREcnYlnX8gyLpE\nj4UE2ZOvcmh2JaIsi8RSV9mVyJYt8NBDxW2PpKWApUykyrIouyI90dPF3b4OfBNYzqHZlYiyLBJL\nqbIrkU99SlmWmFHAUia6yrIouyI9kW12pSv7CQKWdJRlkVjpLrsSUZYldhSwlJHkLIuyK9IT+Vo6\nvzWDMsqySKyky65ElGWJFQUsZaR///5MHjYZgLrhdcquSM7ykV3JxeUoyyIllkl2JaIsS6woYCkz\na65Yw91/fzerL19d6qZIGSvVxoSrgc3h99E0aS2ILkWVaXYloixLbChgKUMfm/6xUjdBylipsiuR\nF+jYIPETdMw2imS6UJ1I1rLJrkSUZYkNBSwifUypsiuRawimQbcBd4XH5gPtdN7pWYGL5F1TU3bZ\nlciXvqQsSwwoYBHpQ0qdXYFg8O2y8PvoV8A+4Ft07PT8BeBIgsBFJG9mzAhWsb3zTrjyyszrrVgB\nSzRkvNQUsIj0ITNK3YBufIuOnZ5XE2RcrqbzIN1ovIuyL5Kz2bPhwgvh8cehIsNfgRUVwR5DyrKU\nlAIWkT5kLTC41I1IYT/BJoyJEqdCf4tgvMsn6Og2EsnJli1B91CmOzO3t0NjY1BPSqaq1A0QkeLa\nE369HXgM2AZsAlaVrEUdkv9+rSAY83ImQcACwbgXJ8i+nMmhQY5IWmPGwMqVsHt3t8Xa3Vnnzh53\nhgwZwqTRo/VXfgkpYBHpoy4JH5FVwA3A88AfS9KiQ7UTZFk+DkRDJaOgJsq+nF38ZklvMHVqypf2\ntrZy29at3Lx5M+tbWoKDLS1M2rOHy8aM4ZJRoxhcpV+fxaZgUUQAmAp8H/gDsBK4ETivpC0KGPDT\nLo5H2ReNKpB82tjSQkNjI19cu5YNUbASWt/SwpVr19LQ2MjGpNek8BQiisghpoaPywkyL1Hi/CXg\nDYJBr1nMseiRVAFJlH3JNMuyg9JP6ZZ429vayqzly1nf3Nzl5y46tr65mVnLl9PY0KBMSxEpwyIi\n3ZoKnBA+zgcuAuYBvyhlo0KZZlkS13cBzTKSrt2+dStrmpvT7o/VCqxpbubH27YVo1kSUsAiIjl5\nP7AYuJMg2zK3BG1IzLKkEg3QbQu/LubQxekUwEi7Ozdt3py+YIKbNm2iXVOdi0a5LBHJ2eyk59cC\nzxF0G0EQLHwNeLmAbYiyLLPpesbQYjrWd2kkWJSuDfg8sB74BvCvwH3ABwrYTom3dc3NHQNsM+DA\nupYW1jU3UzdwYOEaJgcpYBGRvInGvkQ2E6ybUkjdjWVxgmCmkiBIqaBj8bk14ddvha+dTxDczKYj\n46IxL33Hnra2otaT7ClgEZGCGUMw42g38H/A6+HxPcB383idVFmWxOwKBMFNsn0J338BuBmYEz5/\nCDgLDdjtC4ZUVha1nmRPAYuIFFSUcTkh6fjnCJbgf08ertFVliU5u5KJNQRBSxTYfDX8/j0EXUYT\ngIY8tFfiZ1JNDROrq9nQ0pLRVHkDJlZXM6mmptBNk5AG3YpISUwlyGSsBJYmPJ4CBuRwPqPzjKEo\nu5Jtwn5twvdNwGV0dBnNAC7OoW0SfxVmXD5mTFZ1Lh87lgrTWsvFooBFREoqcdr0CQQZkf05nMcJ\nsixb6Jxd6ak1Sc/v5NAgaAedAx0pT5eMGkVdTU3arocqYEpNDRfX1hajWRIqyy4hM/s88CWgFlgO\nXObuf0pR9jTgd0mHHRjl7q8WtKEikrUZBNmRbcBWOpbk787hwDuAoQTjZh6i89iVfHKCLMtd4fMl\nwDnh8QUE3UhSngZXVfHotGnMWr6cNc3BJy+xeyjKpUyqqeGRadO0aFyRld3dNrMPAd8BPk2QQZ4H\nLDGzqe7+WopqTvCH3N6DBxSsiMRW8nTpbOQydiVbPwF+TJCi/ic6fqnNB76CUtflbFx1NY0NDdy+\ndSs3Je4lRDBm5fKxY7m4tlbBSgmU4x2fB/zQ3e8GMLPPEnSFXwxc10297e6+p5vXRaQXSJ4ZVAhR\nluXDwLMJx/cRZFmuKfD1pbAGV1Xxj+PGcfnYsaxrbmZPWxtDKiuZVFOjMSslVFZ/CJhZP4JB+o9G\nx9zdgUcIMsIpqwLPmtkWM3vYzE4ubEtFpBSi7EoxfrD9hCC7kmwhXU+f1mq65afCjLqBA6kfPJi6\ngQMVrJRYWQUswBEEmd5Xko6/QjCepStbgc8QLGL5fmAj8LiZTS9UI0WkNLYQZFe6ChjyzQlW9U0W\nZVkSdbWXkQIYkeyUY5dQVtx9FcGGs5GnzGwyQdfSRaVplYgUQuJCda+SnzVecrGQYN+iCg7dy6id\noA/bCbqvzipRG0XKTbkFLK8R/H8/Mun4SIJJBZlaCpySrtC8efMYOnRop2Nz585l7txSbPMmIplI\n3BpgJflbnC4biWNZkvcyuoyODNDnCf6aMrSarpSnRYsWsWjRok7Hdu/eXZBrmZfZTpNm9hTwtLtf\nET43gr3VbnL3f8/wHA8De9z9/BSv1wONjY2N1NfX56nlIlIqqwiyLssI+oeLYSDBFgQnEqy9EO1l\nlNxd9WB4fE74vTIuUu6amppoaGgAaHD3vI2BL7cMC8D1wJ1m1kjHtOaBBOs5YWYLgdHuflH4/AqC\nTVn/DFQDnwLeDZxZ9JaLSEkkbg9wOsXpMtpHsPFjur2MvgAMo6PL6Ew61vtQ1iVeWlpbuTmc6jyx\nuprLxoyhWtObi6bs7rS7329mRxDsCj+SYFbhbHffHhapBcYlVOlPsG7LaIKfIc8Bs9z9ieK1WkTi\nIrnL6DngDYLg4Mo8X+seguCjuzz2uoTvE/dDWgKcS8cGjFI6TXv2MOf559l24ECn41etX09tv378\n5m1vo37IkBK1ru8ouy6hYlCXkEjf9CRwagmvXwlMJ0gdH02wLcAUgsBKWZfSuH3LFj65alXacrdN\nncolo0cXoUXxpy4hEZECexdBcHA3XY/i/yWws4DXbyPIssynYw+j1QQDd88hyLporEvxNO3Zk1Gw\nAvDJVas4/rDDlGkpIAUsIiIJphIEDMk2A7cX4fqVwLeSjl1OEEglTo9OHOsihTHn+eezKv+eF15g\ny8lal7RQFLCIiGQgWuNlMfB6N+Uc+E8OXd0yU20cugfSGoIgKnF6dDTWJVPqSspOS2vrIWNW0tn6\n5pu0tLZqIG6B6K6KSE7s69n/fe9fK+8xc1PpPGi3Kw8B3yzAtRfQsaFjJcEaL7PJLMuirqTs3bx5\nc871vjx+fJ5bI1B+S/OLSAzkEqz0pF65KOReRvvpyLxEY12WdFM+Wvo/eaXd8g4Ziydxl+Zi1JP0\nFLCISNaGM7yo9cpFMfcyirIsXQUgiXsXJa+0212QIx0mVlcXtZ6kp4BFRLK242u5bd2Xa71yEY1z\nWQo8UOBrpcqyJGZUvkoQ1FSGr1WQOsiRzi4bM6ao9SS9HgcsZjbEzM4zs2Pz0SARKQ/ZZkt6e3Yl\nMpVgRd05QKFXp+wqy5KYUWkKH1FXUjvKsmSquqqK2n79sqozqn9/DbgtoKwDFjO738y+EH5fAzwD\n3A88Z2YfyHP7RCSmss2W9PbsSleidV2WUpisS3KWJRpDU5myRkBZlsz85m1vy6r8A8cdV6CWCOSW\nYTmVYEFIgPcRDFIfRrBUwDV5apeIlIFMsyZ9JbvSlSjjEmVdEgOYpcDTBF1JuUrMskTZleRp0cmS\nsyx9L5TMTP2QIdw2Nd28sMBtU6dq0bgCyyVgGUrH5/ts4Bfuvg/4DcEq0iLSR2SaNemL2ZVUEgOY\nEwg2OcttAm0gyrIsJrPsSiQKchIH6MqhLhk9msb6+pTdQ6P696exvl7L8hdBLp1tG4F3mNkOgoDl\nw+Hx4YDmc4n0McMZzs5uFqzvy9mVTORjoncFQYp7TbqCCRKDHK2e2736IUPYesop2q25xHK5098F\nfgr8FXgJeDw8fiqQ3TrGIlL2dnxtR7frq2SbXVn1+ip2t+zOuPzQ6qFMPTyztH0cRTOLEt/xq8B7\nsjhHO0GwUkF2U6oTg5zk1XOjfzWtjgvt7qxrbmZPWxvvO/JIJtXUUGEK7Yot64DF3W81s6XAOOC3\n7h79/7EOjWER6ZNSZVmyza5s3rOZo285Ouvrb5q3iTFDync6aVfh1n8Cn+6mzo3AO8Lv/xf4R7Jf\n/yUxyElcPfdh4NywzEPADPpm4LK3tZXbtm7lpk2b2LB//8HjEwcM4PKxY7lk1CgGK8NSNDlNa3b3\nZ9z9V+7+14Rjv3H3P+avaSJSLlJlUbLNrowePJr6UfVUWGY/miqsgoZRDYwe3LvGDzjwA1KPR6kk\n2FF6Rvi4u5uy6URBTuJYmKvD4+3ApfTNMS4bW1o4/plnuHLt2k7BCsD6/fuZt3Ytxz/zDBu1sm3R\nZBQamtn1wL+4+xvh9ym5+5V5aZmIlJXkLEsuY1fMjPnvns+5956bvjDQ7u3MP2M+1svS84lrqXQl\ncTqzpymbjUoOHQuzNvyaOMZlBzAsoZtkSGVlr+om2dvayqnLlh0SqCRb29LCqcuW8dwJJyjTUgSZ\n3uHjgX4J36eiqf0ifVTyWJZcZwadXXc29aPqWb5tOW2eeoJupVUyvXY6syfPzuk6cZW4lkp305Oj\nLpx2sh+7kkobqQfuRgFSc2sr79+yheGbNrHzzTcPvj4pHITaG7pJvrd5c9pgJbJh/35u3bKFr7zl\nLQVulZi7YoxkZlYPNDY2NlJfX1/q5oiUFfu69XhX5odWP5RRluWhjzzE2XVnpy1XTh6iY/xInFQA\nU/fvZ+XTT+PtqcOjuupqHps+nXFluqdOuztH/OEP7GxLt5pNhxGVlWx/5zt7TYapp5qammhoaABo\ncPd8JQBzWun2yG5ey25ZQBHpdXoarEBHlqXSuh6ZUWmVNIxq6LXZlUx/MFcAxxEsPhctRPcUMKAA\nbWtvaeHFZ5/tNlgBWNPSwvF/+hNbynRsx5p9+7IKVgB2tLWxZt++ArVIIrkMun3ezOYkHzSzLxH8\n/yIi0iPRWJZUXUJt3tYrx65ku9tzO/ACwdToaCG6SiCzzowsbdgAzc0ZFX29rY0pS5ey4o03CtGS\ngvpzjm1u+R70AAAgAElEQVTOtZ5kLpeOxuuBX5jZHcCVBLPd7gbeBlyQx7aJSB+WaixLbx27Al2v\nyZLOUDov7T+DYNDuti7KPkfwAzxr7jB6NGzr6qxd29fezrRnnmH5jBkcO2hQLlctiVcSxuUUo55k\nLpd1WK4zs98CPyH4/I8gyEi+3d0z/zSLiHQj1Yyh3ppdiSSvybKD7NdA6SqUc4JgJqcBumYwZAiM\nGAE7Mh9MfcCd6c88w+dGjWLBxIkMzHL341I4Msc25lpPMpfTOiwEA8lfACYAQ4D7FKyISL4lj2Xp\nrWNXUsnnPj/Zdjcdwh3Gj8+62pvu3LhlC4P++EeGPPEET+5MvY1DHAzJcYZTrvUkc7kMuj2FILMy\nBXg78DngZjO7z8y0aYiI5E3yWJbenl1J5ARrn0T7/PR0KHPU3bQ06fHdTE9gBj3MIuxtb+fU5cv5\nyurVtMd0hmr/itz+jh+uDEvB5fIv8xhwH3CSu69w99sI1mZ5C9pLSETyLMqyAH0qu5K4eFy0BkpP\nJe8UHa2Sm7HW1jy0Aq7bvJmRf/wj17/8MnvzdM58eTjHDNAwZVgKLpeA5Sx3/yd3PxAdcPe1wCnA\nD/PWMhERgizLgjMWUGmVLJi1oM9kV6LF46Bjkbh85ySibqKM5fGX8mutrXxx3Tre8tRTLH799Vhk\nXNrdufeVV7KuN2HAACbV1BSgRZIol0G3v09xvB34Zo9bJCKSZHbdbF798quMqOkbW/AlL82fuBR/\nPpfJi7qJngM2Ekz77NIrr8CmTcE4FrPga57sam3lnOef5/CqKr76lrfw6dGjS7ZS7rrm5oxXuE30\nkZEjtWhcEeT0qTCzQcBpBN1A/RNfc/eb8tAuEZFO+kqwkmpp/sTdlPP5q3EqHTOT5tAxpdqBC4FV\ngA8dCitXQppF43ri9dZWvrRuHV9dt457jj6a82trix4E7MlywbjI7BF947NZalkHLGZ2PPAgMBAY\nRDDr7ghgH/AqoIBFRCRHqTY+LFSWJVHilOrNBNkXAKqrYdo0WLasQFfucAD40MqVVKxcyVfGjeOr\n48f3KOPSnsUmjUMqc9vzelT//ukLSY/l8im4Afh/wGcJgvGTCD5j9wA35q9pIiJ9S7qNDwuVZenK\nIYvYDR3K9pNO4hsvvsjTu3YV+OrB9OuFGzdy+7Zt3HHMMUytqclqR+i9ra3ctnUrN2/ezPqEbQK6\n26TxyH79GFFVxY4sBgJPqq7W+JUiySVgmQ58xt3bzawNGODu68zsKuAu4Jd5baGISB+RKrsS6S7L\nkk0mIVPJi9hRXc2506ez8803uXTVKu577bW8DwRO9uqBA8x5PpiAmumO0BtbWpi1fDlruthKYH1L\nC1euXcutW7bw6LRpBzdpjOpkE6wYcPnYsRq/UiS5zBI6QMfaQ68SjGOBIBAfl49GiYj0Nckzg1JJ\nnjG0t7WVGzZupO7pp5mydCkNjY1MWbqUKU8/zXc3bizItOHh/fuz6Ljj2HnKKXx13DiOKtIaJOvC\nYKOhsZGNKTZX3Nvayqzly1nf3Ixz6Myq6Nj65mZmLV/O3tbWg3XWZbhXEgR/7U+pqeHi2toc341k\nK5eAZRnBFH6A3wPfMLOPEKw/9EK+GiYi0pdE2ZV0wz4TsywbW1poaGzki2vXsiHpF/j6DH6599TQ\nfv1YMHkyW08+mXuPOaYg10iWHGwku33rVtY0N5MuTGsF1jQ38+Nt27h+40ZWNzenvfeJJtXU8Mi0\naSWb0dQX5RKwXA1sDb//Z2An8H3gSODTeWqXiEifEWVXMv2BXAFc7c4ZWWYSCqXCjLm1tVw3cWLB\nrpGoFVgdBhuJ2t25afPmrM513csvc+1LL2VV5/CqKpbW1x/sTpLiyDpgcfdn3P134fevuvvZ7j7E\n3RvcfXn+mygi0rtlu89PO7DMjDVtbVllEgrty+PH88S0aQzOcXn7bH1z/fpOC86ta25mfUtLxuNq\nHNiSwy7Lr7e2sv3AgfQFJa969Kkys38ys2H5aoyISF+Uap+fVI+n3Bn77LNYFr9sb9q0qSiryb5r\n+HD2nHoqb5xyChePHFnQa73e1saNCdmRXNdRyUUxryWBnna+XQ3cDxR+jpuISC92yIycbqxpbmZT\nFlOLnWDA6rrmZuoGDsyqXbnOPhrYrx+3H3ssX3rLWzjt2WcLlpG4csMGfrNrF3cfc0zO66jkopjX\nkkBPAxbN5RIRKbJc/7rPpl4u65h05dhBg1g7cya3bN7MNevXZ9ztlY1Hd+3iLU89xZPTpzN+wABe\nymF5/WzU9uuntVdKoDgdjSIikje5/nWfab18zz4aXFXFV8ePZ93MmQWbAt0GnPzss4wfMKAg50/0\nt8OHa+2VEsg6YDGzu8zs1PDp3wDZDa8WEZEemVRTw8Tq6oxT3EbmK7Lmso5JpsbX1LBm5kw+P3p0\nxnWy9cSePQU7d+Qfjjyy4NeQQ+WSYRkKPGJmq4GPAVo1R0SkiCrMuHzMmKzqZLoiay7rmGRjcFUV\nt0ydygszZjC4DLMUlcC5hx9e6mb0SblMaz6PYFD794EPARvM7CEzO9/MirPcoYhIH3fJqFHU1dSk\nHYiYzYqsuaxjkuvso7cedhibTzmFBRMnltVgyPcecQRVRZq2LZ3ldNfdfbu7X+/u04CZwBrgJ8AW\nM7vBzKbks5EiItLZ4KoqHp02jYk1NRiHzoCIjmWzImsu65hEs49yEY1tWT9zJrVFWt6/JyqBW+rq\nSt2MPqun67CMAs4MH23Ag8DbgL+Y2byeN09ERFIZV11NY0MD10+ezISkVVcnVldzQ10dzzQ0ZLwi\nazFmH3VlfE0Nq2bO5NJRo3p0nkL7/bRpjNbqtiWT9bTmsNvnvcAngLOA5wj2EbrX3feEZd4H/Bi4\nIX9NFZHeaNXrq9jdsjvj8kOrhzL18GxWLendBldV8Y/jxnH52LE93q250LOPujO4qorvHX00l40Z\nw1ufeaYg05974uwRIzhl+PBSN6NPy2Udlq0EmZlFwInu/mwXZX6HFpMTkTQ279nM0bccnXW9D7/1\nwwzqP4iJwybyz6f+cwFaVn4qzLJeFC5ZNPtoQ4bdQkaQycnnmiTHHHYYG046ibc+/TR7i7AybyYq\ngNunKkgutVwClnnAf7l7ygn47r4LKM4uWCJStkYPHk39qHqe3fYs7Z7539Q/+/PPDn4/bug4Lpx2\nYSGa1+dEs4+uXLs24zqZzj7Kxrjqal6cOZPjli5lZ3vpcy3nHX64uoJiIOuAxd1/UoiGiEjfY2bM\nf/d8zr333JzPMWX4FP60+U9dvqbuo+xdMmoUt27Zwvo0U5urCDIymcw+ysXo6mqWn3gipy5bxoYC\nr1ybzklDhpT0+hLo6dL8IiI9cnbd2dSPqmf5tuW0efaDN0++4+RuX980bxNjhmS3ZklfFs0+mrV8\nOWvC2T+JHTNRLiWb2Ue5GlddzXMnnMD1Gzdy7UulW6P0sizXvJHC0GRyESmpKMuSS7CSCY/JOIhy\nku/ZRz0xuKqKr02cyAszZhT8Wl0Z1b8/1QUMyiRz+lcQkZLraZYllbrhdcqu5Cifs4/y4a2HHcaf\n6us5oampqNd94Ljjino9Sa0sMyxm9nkzW29mzWb2lJmdkKb86WbWaGYtZrbKzC4qVltFJL1CZVlu\nOucmrAyXf4+TaPZR/eDB1A0cWNJN/44eOJCrxo0r2sq4P5oyhXqNX4mNsgtYzOxDwHeArwHHA8uB\nJWZ2RIryE4AHgEeBacCNwG1mdmYx2isimYmyLJXW8zU9IMiunF13dl7OJaUX7SD97xs3ZrwSb098\n7Mgj+aTGrsRK2QUsBNOqf+jud7v7i8BngX3AxSnKfw5Y5+5XuftKd/8e8PPwPCISE/nOsii70nsk\n7yBdaAbcrHVXYqesApZwld0GgmwJAB6MqHsEeEeKaieFryda0k15ESmRTLIsZ0w4g8nDJ3d7HmVX\nepdMd5DOl8+OGsXQMtjbqK8pq4AFOIJg/6lXko6/AqRaDKA2RfkhZjYgv80TkZ7IJMuy5CNLuOns\nm7o9j7IrvUcuO0j31CdjvqdRX1VuAYuI9HJRlgXgsH6HdXpt1sRZVFVVcc6Uc1JmWZRd6V2y3UE6\nLxTsxlK5TWt+jWBX6JFJx0cC21LU2Zai/B5373b5xHnz5jF06NBOx+bOncvcuXMzbrCIZMfMWHDG\nAubcO4f7PnAfc3425+Briy9YfLDMTWffxJxFcw6pr+xK79LTnaBzkY/NHPuKRYsWsWjRok7Hdu/O\nfDPTbFi5LapkZk8BT7v7FeFzA14GbnL3f++i/LeBc9x9WsKxe4Fh7t7leuBmVg80NjY2Ul9fX4i3\nISJp7GjewYiaEZx595k8sv4Rzpp0Fks+tuTg6+7OlJunsHZnx743dcPrWHXZKgUsvciaffuYsnRp\n0a43qbqa1TNnlnT6drlramqioaEBoMHd87ZwTjl2CV0PfMrMLjSzY4AfAAOBOwHMbKGZ3ZVQ/gfA\nJDP7NzM72swuBc4PzyMiMTWiZgQAv73wtzz6sUc7BSvQkWVJpOxKz7W7s2bfPpr27mXNvn20l/iP\n2mgH6WL9qxZiM0fJj3LrEsLd7w/XXPkGQdfOs8Bsd98eFqkFxiWU32Bmc4AbgMuBTcAl7p48c0hE\nYuqMSWd0efycKedQN6KONTvWMGXElLIdu7Lq9VXsbsk8jV6ITR33trZy29at3Lx5M+tbWg4en1Rd\nzWVjxnDJqFEF3TcolVx2kM7VEVVVBdvMUXqu7AIWAHe/Fbg1xWuf6OLYEwTToUWkFzEzbjnnFs79\n6bnccu4tZZld2bxnM0ffcnTW9fK5qePGlpZOmx0mWt/SwpVr13Lrli08Om1aUfYPSpbpDtI9NW/s\n2JIEZZKZcuwSEhE5aHbdbLZftZ2zJp9V6qbkZPTg0dSPqqfCMvtxXGEVNIxqYPTg0Xm5fvKibMkd\nQNGx9c3NzFq+nL2txVoNpUO0g/TEmpqCXuf9Rx5Z0PNLzyhgEZGyF413KUfR2jPt3p5R+XZvZ/4Z\n8/OWTcp0UbZWYE1zMz/elmpCZmFFO0jfMHkyIwqQBekPTB04MO/nlfxRwCIiUmKZ7qNUaZU0jGpg\n9uTZebluLouy3bRpU8kG4kY7SK+bOZPR/fvn9dwfr63VYNuYU8AiIlJime6j1OZtec2uZLsomwPr\nWlpYlzDWJdtZRenKZ3K+PW1t9MtzcPE5bXQYexpdJCISA1GWZfm25V0GLpVWyfTa6XnLrkDui7Lt\naWvLelZRuvIfPPJI7tu+Pe35ojE3m/Z3u+5nVsYNGMDbDzssfUEpKQUsIiIxEGVZzr23y/Us855d\ngdxXdH2jtZWGxsaMZxWlm4U0b+1arlq3jtYusinJ5/vF9u2syeOuzQZ8cdw4dQeVAXUJiYjERKqx\nLPkeuxLJdlE2AyYMGMAlq1ZlPKtoSxisdFce4IB7Ruf77qZNGb+/dCqBKTU1WnulTChgERGJiVRj\nWQqRXYGORdmyMX3w4KxmFX1+9eqMyqcTne+l/ft7nF2x8DG5poZHpk3T2itlQgGLiEiMJGdZCpVd\niVwyahR1NTVpxwdUAXXV1Szbuzer8z+wY0dxd1rOwMTqam6oq+OZhoaSLIQnuVHAIiISI8lZlkJl\nVyKJi7JFmYdO7Qkfk2pquP2YY7LKcDh0OS4lVz090xfHjmX1iSeyeuZMrtCqtmVHAYuISMxEWRag\noNmVSLQo2/WTJzMhKeOQmI0YlOMg3VKrAqbW1PC1CROoGzhQA2zLlMJLEZGYMTMWnLGAOffOYcGs\nBUXZIylalO3ysWNZ19zMnrY2hlRWMqmm5uAv+FxnFZXaJI1V6RX0ryciEkOz62bz6pdfLfq2AxVm\n1KVYoj6aVbQhw8XmDKg0y1u3kAHjq6upAja0tKQdyNvPjG9OmMClY8YoWOkF1CUkIhJTcdsjKZdZ\nRe8ZMSLjadOZ+MexY3ls+vSUY24iEwYMYPWJJ/KV8eMVrPQSClhERCRj2cwqmlJTw/emTMmofDrR\n+S6ure12zM2k6mq+W1fHcyecwPgC7+4sxaWwU0REMhbNKkpcuTaxwyfKeETjRkZXV6ct7wTdN1HX\nUXfni7IlmYy5kd5FGRYREclKprOKojVO0pX/bl0dG2bOzPh8iaIxN/WDB2sGUC9nXqJtwuPMzOqB\nxsbGRurr60vdHBGR2Gp3zyrDka58tueT+GlqaqKhoQGgwd2b8nVedQmJiEjOuptVlEv5bM8nfYe6\nhERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsK\nWERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsK\nWERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsK\nWERERCT2FLCIiIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi0iSa7FSN0FERJIoYBFJEAUrClpE\nROJFAYuIiIjEXlkFLGY23Mx+ama7zWynmd1mZoPS1LnDzNqTHg8Wq81SPpKzKtcypkQtERGRZFWl\nbkCW7gVGArOA/sCdwA+Bj6ap9xDwcTj4G2l/YZon5SR9t8+WLoIYL1yDREQkpbIJWMzsGGA20ODu\ny8JjlwG/MbMvufu2bqrvd/ftxWiniJSnPWxmiLJqIrFVTl1C7wB2RsFK6BHAgZlp6p5uZq+Y2Ytm\ndquZjShYK0Wk7GzkKa5nLBt5qtRNEZEUyilgqQVeTTzg7m3AjvC1VB4CLgTOAK4CTgMeNDNNA+nj\nsu3eUXdQ7/UCPwPgz9xX4paISColD1jMbGEXg2ITH21mNjXX87v7/e7+gLv/2d1/DbwHOBE4PV/v\nQcpXpkGIgpXeq532g4HKC9yH699aJJbiMIblP4A70pRZB2wDjko8aGaVwIjwtYy4+3ozew2oA37X\nXdl58+YxdOjQTsfmzp3L3LlzM72clIFr8W4H4CpY6V3aaOUA+w4+30oTfw1/hPyVrbzEE9Ry/MHX\n+zGQylj8qBSJn0WLFrFo0aJOx3bv3l2Qa5l7efwwDgfd/hmYkTDo9izgQWBsmkG3iecZC7wE/L27\nP5CiTD3Q2NjYSH19fV7aL/GVySJxClp6jx9xEpt5utMxoxKn7eDXRGM5iU/yf8VsokhZa2pqoqGh\nAYJJMk35Om/Ju4Qy5e4vAkuAH5nZCWZ2CnAzsCgxWAkH1v59+P0gM7vOzGaa2XgzmwX8N7AqPJdI\nF0aXugFSQKfzNaoZ1ulYFKQkByvVDOM0/rVobROR1Motz3kBcAvB7KB24OfAFUllpgBRP04b8HaC\nQbfDgC0Egcq/uvuBYjRYyktiJkXL8/dOUziHS3mBX3ABL/FEynLjOZUPcK+mOovERFkFLO6+izSL\nxLl7ZcL3LcDZhW6X9AajuZbNnY4EY1vGEMS50psMYQwX8Rg/432s5sFOmRWjkqnM4UP8kgoquzmL\niBRTWQUsIoXQ3fiU5CBGeo8KKnmNFw8GK0YFTjtOG6/xooIVkZgpmzEsIiL59Cp/YQerDz4fwZSD\n37/OKrazohTNEpEUFLCISJ+0gl8AUEU1f8+P+QIreC+3U0U1AH8JXxeReFCXkIj0SbvZyEim8Q/c\nxxEcDUA9FzOOk/k5H2Y3L5e4hSKSSAGLiPRJ7+EHWPhfoiM5hs+yTCveisSMAhYR6ZMquukR7yqQ\nEZHS0hgWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURERGJPAYuI\niIjEngIWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURERGJPAYuI\niIjEngIWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURERGJPAYuI\niIjEngIWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURERGJPAYuI\niIjEngIWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURERGJPAYuI\niIjEngIWERERiT0FLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYK6uAxcyuNrM/mtkbZrYji3rf\nMLMtZrbPzH5rZnWFbKfkZtGiRaVuQp+je158uufFp3veO5RVwAL0A+4Hvp9pBTP7CvAF4NPAicAb\nwBIz61+QFkrO9EOl+HTPi0/3vPh0z3uHqlI3IBvu/nUAM7soi2pXAN909wfCuhcCrwDnEQQ/IiIi\nEnPllmHJiplNBGqBR6Nj7r4HeBp4R6naJSIiItnp1QELQbDiBBmVRK+Er4mIiEgZKHmXkJktBL7S\nTREHjnX3VUVqEkA1wIoVK4p4Sdm9ezdNTU2lbkafontefLrnxad7XlwJvzur83lec/d8ni/7Bpgd\nDhyeptg6d29NqHMRcIO7j0hz7onAWmC6uz+XcPxxYJm7z0tR7wLgp5m9AxEREenCR9z93nydrOQZ\nFnd/HXi9QOdeb2bbgFnAcwBmNgSYCXyvm6pLgI8AG4CWQrRNRESkl6oGJhD8Ls2bkgcs2TCzccAI\nYDxQaWbTwpfWuPsbYZkXga+4+/+Er30XuMbM1hAEIN8ENgH/QwphEJW3qFBERKSP+d98n7CsAhbg\nG8CFCc+jTsl3A0+E308BhkYF3P06MxsI/BAYBjwJnOPubxa+uSIiIpIPJR/DIiIiIpJOb5/WLCIi\nIr2AAhYRERGJPQUsgJmNN7PbzGxduEHiajO71sz6ZVBXGyvmKJfNLM3sDjNrT3o8WOi29hbaQLT4\nzGy4mf3UzHab2c7wZ82gNHX0Oc+CmX3ezNabWbOZPWVmJ6Qpf7qZNZpZi5mtynK7FyG7e25mp3Xx\neW4zs6OyuaYClsAxgAGfAv4GmAd8FvhWd5W0sWKPZb2ZZeghYCTBasW1wNw8t6s30waixXcvcCzB\n8gpzgFMJJgGko895BszsQ8B3gK8BxwPLCT6fR6QoPwF4gGDLlmnAjcBtZnZmMdrbG2R7z0NOMCkm\n+jyPcvdXs7qwu+vRxQP4EsF06e7KbAHmJTwfAjQDHyx1+8vpAVwE7Miw7B3AL0vd5nJ/ZHnP9TnP\n/T4fA7QDxyccmw20ArXd1NPnPPN7/BRwY8JzI1i64qoU5f8NeC7p2CLgwVK/l3J55HDPTwPagCE9\nua4yLKkNA1KmzLWxYkmdbmavmNmLZnarmXW74rHkTp/zHnsHsNPdlyUce4Tgr82Zaerqc55G2G3f\nQOfPpxPc41Sfz5PC1xMt6aa8JMjxnkMQ1Dwbdi0/bGYnZ3ttBSxdCPvnvwD8oJti2lixNB4iWIvn\nDOAqgsj9QTOzkraq99LnvGdqgU5pb3dvI/hjqLv7p895Zo4AKsnu81mbovwQMxuQ3+b1Srnc863A\nZ4APAO8HNgKPm9n0bC7cqwMWM1vYxUCf5EE/U5PqjCH4YXGfu/+4NC0vX7nc82y4+/3u/oC7/9nd\nfw28h2Bcxen5eg/lptD3XA6lz7lI5tx9lbv/yN2XuftT7n4JwUq4Xe7nl0q5rXSbrf8g6Avuzrro\nGzMbDTwG/MHdP5Om3jaCFNdIOkeaI4FlXdboG7K65z3lwX5RrwF1wO/ydd4yU8h7rs951zK959uA\nTjMhzKySYIuRbZleTJ/zlF4jGBsxMun4SFLf320pyu9x9/35bV6vlMs978pS4JRsLtyrAxbPYmPF\nMLPyGPAn4OIMzp3rxoq9Wjb3PB/MbCzBbt9bi3XNuCnkPdfnvGuZ3nMz+z9gmJkdnzCOZRZBEPh0\nptfT57xr7n7AzBoJ7umvAcJus1nATSmq/R9wTtKxs8LjkkaO97wr08n281zq0cZxeACjgdXAw+H3\nI6NHUrkXgb9PeH4VwQ+tvwPeBvx3eJ7+pX5P5fAAxhFMK/xXYHf4/TRgUFf3HBgEXEfwy3J8+D/I\nM8AKoF+p3085PLK95+Fzfc57ds8fDD+nJxD8RbkS+ElSGX3Oc7+/HwT2EYz5OYZgyvjrwJHh6wuB\nuxLKTwD2EswWOhq4FHgT+NtSv5dyeeRwz68A3gtMBt5KsCnxAeD0bK7bqzMsWTgTmBQ+NobHjGCw\nYWVCOW2smF/ZbmbZBrw9rDOMYLrtEuBf3f1AwVvbO2gD0eK7ALiFYBZFO/Bzgh/gifQ5z5G73x+u\n//ENgj80nwVmu/v2sEgtQaAeld9gZnOAG4DLCabjXuLuyTOHJIVs7znQn2DdltEEgc5zwCx3f4Is\naPNDERERib1ePUtIREREegcFLCIiIhJ7ClhEREQk9hSwiIiISOwpYBEREZHYU8AiIiIisaeARURE\nRGJPAYuIiIjEngIWEenVzOx3ZnZ9qdshIj2jlW5FpFczs2HAAXd/o9RtEZHcKWARERGR2FOXkIgU\nhZkdYWZbzeyfEo6dbGb7zezdKerMMLOHzWy7me0ys8fN7PiE108L65+ScOwqM9tmZkeGzzt1CZnZ\npWa2ysyaw3L3F+Ydi0g+KWARkaJw99eAi4Gvm1m9mR0G3A3c5O6/S1FtMHAncDIwE1gFPGhmg8Jz\n/p5g1917zGxwGMx8g2D33e3JJzOzGcCNwDXAVGA2HbtUi0iMqUtIRIrKzG4GzgSeAY4DTnD3AxnW\nrQB2AnPd/cHwWD/gKWB1eL4n3f1zCXV+Byxz9yvN7H3Aj4GxGtMiUl6UYRGRYvsyUAWcD1zg7gfM\nbJyZ7Q0fe6JuIzM7ysx+FHbh7AJ2A4OAt0QnC4OdjwIfAAYAV3Zz7d8CLwHrzexuM7vAzGoK8i5F\nJK+qSt0AEelz6oDRBH8wTQT+AmwBpiWU2RF+vRsYDlwGvAzsJ8im9E86ZzSGZUT42NzVhd39r2ZW\nD/hSrogAAAFaSURBVJwOnAV8HbjWzGa4+54evSsRKShlWESkaMLum58APwP+BbjdzI5w9zZ3X5fw\n2BVWOZlgjMsSd18BHACOSDrnZOB64JPA0wRBTkru3u7uj7n7PxEESROAM/L3LkWkEJRhEZFiWgAM\nIciY7APOBe4A/i5F+dXAx8ysERgKXBfWAw6OabkHeMjd7zKzJcBzZvYld/+P5JOZ2RxgEsFA253A\nHMCAlfl5eyJSKMqwiEhRmNlpwOXAR939DQ9G/F8IvNPMPpOi2sUEXUKNwF0EM3xeTXj9n4FxwGcB\n3H0b8Bngm2b2trBM4syCXcD7gUcJuqI+DXw4zN6ISIxplpCIiIjEnjIsIiIiEnsKWERERCT2FLCI\niIhI7ClgERERkdhTwCIiIiKxp4BFREREYk8Bi4iIiMSeAhYRERGJPQUsIiIiEnsKWERERCT2FLCI\niIhI7ClgERERkdj7/656HZGBGC46AAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class kmeans_clustering:\n", + "\n", + " def __init__(self, file_1, file_2, file_3,file_4,file_5):\n", + " self.file1 = file_1\n", + " self.file2 = file_2\n", + " self.file3 = file_3\n", + " self.file4 = file_4\n", + " self.file5 = file_5\n", + " self.df = pd.DataFrame()\n", + " self.df_norm = pd.DataFrame()\n", + " print('----Initialization complete-------')\n", + "\n", + " def read_data(self):\n", + " f1 = open(self.file1, 'r')\n", + " f2 = open(self.file2, 'r')\n", + " f3 = open(self.file3, 'r')\n", + " f4 = open(self.file4, 'r')\n", + " f5 = open(self.file5, 'r')\n", + "\n", + "\n", + " reader = csv.reader(f1)\n", + " data1 = list(reader)\n", + "\n", + " reader = csv.reader(f2)\n", + " data2 = list(reader)\n", + "\n", + " reader = csv.reader(f3)\n", + " data3 = list(reader)\n", + " \n", + " reader = csv.reader(f4)\n", + " data4 = list(reader)\n", + " \n", + " reader = csv.reader(f5)\n", + " data5 = list(reader)\n", + "\n", + " print('data1 = ' + str(len(data1)))\n", + " print('data2 = ' + str(len(data2)))\n", + " print('data3 = ' + str(len(data3)))\n", + " print('data4 = ' + str(len(data4)))\n", + " print('data5 = ' + str(len(data5)))\n", + "\n", + "\n", + " data1.extend(data2)\n", + " print('data1 length after 1st merge = ' + str(len(data1)))\n", + "\n", + " data1.extend(data3)\n", + " print('data1 length after (subtract 19120, have spaces after scraping): ' + str(len(data1)))\n", + " \n", + " data1.extend(data4)\n", + " data1.extend(data5)\n", + " \n", + "\n", + " fa = [ ]\n", + " fb = [ ]\n", + " fc = [ ]\n", + " fd = [ ]\n", + " fe = [ ]\n", + "\n", + " for i in range(0, len(data1)):\n", + " if len(data1[i]) != 0:\n", + " fa.append(float(data1[i][2])) # feature1:prog-expense-ratio\n", + " fb.append(float(data1[i][3])) # feature2:asset-liability-ratio\n", + " fc.append(float(data1[i][4])) # feature3:working-capital-ratio\n", + " fd.append(float(data1[i][5])) # feature4:surplus-margin\n", + " fe.append(float(data1[i][6])) # feature5:total-amt\n", + "\t\t\n", + " dfa = pd.DataFrame(fa)\n", + " dfb = pd.DataFrame(fb)\n", + " dfc = pd.DataFrame(fc)\n", + " dfd = pd.DataFrame(fd)\n", + " dfe = pd.DataFrame(fe)\n", + "\n", + " self.df = pd.concat([dfa, dfb, dfc, dfd, dfe], axis = 1)\n", + "\t\t#print('dataframe df combined')\n", + "\t\t#print(self.df)\n", + "\t\n", + " def create_clusters(self):\n", + " array_norm = preprocessing.normalize(self.df)\n", + "\t\t#self.df_norm = pd.DataFrame(preprocessing.normalize(self.df))\n", + " \n", + " global norm_array\n", + " norm_array=array_norm\n", + " \n", + " \n", + " print('normalized dataframe')\n", + " print(pd.DataFrame(array_norm))\n", + " print('compute kmeans clusters')\n", + "\n", + " num = 2\n", + "\n", + "\t\t#Uncomment the following code if you want to evaluate best cluster#\n", + "# \t\t'''\n", + "# \t\tfor i in range(0,10):\n", + "# \t\t\tkmeans = KMeans(init='k-means++', n_clusters=num, n_init=10)\n", + "# \t\t\tkmeans.fit_predict(array_norm)\n", + "# \t\t\terror = kmeans.inertia_\n", + "# \t\t\t#print(\" Total error with \" + str(num) + \" clusters = \" + str(error))\n", + "# \t\t\tnum = num + 1\n", + "# \t\t\tscore = met.silhouette_score(array_norm, kmeans.labels_, metric='euclidean',sample_size=1000)\n", + "# \t\t\tprint('# clusters : ' + str(num) + 'silhoutte coefficent : ' + str(score))\n", + "# \t\t'''\n", + "\n", + "\t\t# Run kmeans on best clusters#\n", + "\t\t\n", + " kmeans = KMeans(init='k-means++', n_clusters=5, n_init=10)\n", + "\n", + " global k\n", + " k=kmeans.fit_predict(array_norm)\n", + " \n", + " global cluster_labels\n", + " cluster_labels = kmeans.labels_\n", + "\n", + " \n", + " global cluster_centers\n", + " cluster_centers=kmeans.cluster_centers_\n", + " print('-------------------------------------')\n", + " score = met.silhouette_score(array_norm, kmeans.labels_, metric='euclidean',sample_size=1000)\n", + " print('silhoutte coefficent : ' + str(score))\n", + "\n", + " #PCA to lower dimensionality of the data\n", + " pca_2 = PCA(2)\n", + " plot_columns = pca_2.fit_transform(array_norm)\n", + " \n", + " plt.xlabel(\"x-axis\")\n", + " plt.ylabel(\"y-axis\")\n", + " plt.title(\"K-means++ clustering\")\n", + "\n", + " i=0\n", + "\n", + " for sample in plot_columns:\n", + " if kmeans.labels_[i] == 0:\n", + " plt.scatter(sample[0],sample[1],color=\"c\",s=75,marker=\"o\")\n", + " if kmeans.labels_[i] == 1:\n", + " plt.scatter(sample[0],sample[1],s=75,marker=\"*\",color=\"chartreuse\")\n", + " if kmeans.labels_[i] == 2:\n", + " plt.scatter(sample[0],sample[1],color=\"green\",s=75,marker=\"v\")\n", + " if kmeans.labels_[i] == 3:\n", + " plt.scatter(sample[0],sample[1],color=\"cyan\",s=75,marker=\"^\")\n", + " if kmeans.labels_[i] == 4:\n", + " plt.scatter(sample[0],sample[1],color=\"red\",s=75,marker=\"^\")\n", + " i += 1\n", + " plt.show()\n", + "\n", + "\n", + "file_1 = 'team_out_1.txt'\n", + "file_2 = 'team_out_a2.txt'\n", + "file_3 = 'team_out_a3.txt'\n", + "file_4='team_out_Yash.txt'\n", + "file_5='team_out_Yash_part1.txt'\n", + "\n", + "k=[]\n", + "norm_array=[]\n", + "cluster_centers=[]\n", + "cluster_labels = []\n", + "class_instance = kmeans_clustering(file_1, file_2, file_3,file_4,file_5)\n", + "class_instance.read_data()\n", + "class_instance.create_clusters()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
31116201610419349301801.xml521221108.00.40000116883.315790-1.915329-9.796983167472.0
49876201641329349301804.xml232787307.01.0000002728.935484-234.9055560.000000360.0
70485201513299349300711.xml481252775.00.0000000.0000000.0000000.0087410.0
54408201610439349303016.xml561949970.01.0000000.0000001.0000000.0000001.0
2827201542589349300999.xml352090479.00.8574100.217362298.930677-12877.6666704818.0
\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "31116 201610419349301801.xml 521221108.0 0.400001 16883.315790 \n", + "49876 201641329349301804.xml 232787307.0 1.000000 2728.935484 \n", + "70485 201513299349300711.xml 481252775.0 0.000000 0.000000 \n", + "54408 201610439349303016.xml 561949970.0 1.000000 0.000000 \n", + "2827 201542589349300999.xml 352090479.0 0.857410 0.217362 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "31116 -1.915329 -9.796983 167472.0 \n", + "49876 -234.905556 0.000000 360.0 \n", + "70485 0.000000 0.008741 0.0 \n", + "54408 1.000000 0.000000 1.0 \n", + "2827 298.930677 -12877.666670 4818.0 " + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#OUTLIER ANALYSIS\n", + "from sklearn import preprocessing\n", + "\n", + "def distance(v1,centroid):\n", + " maxx = 0\n", + " for i,value in enumerate(v1):\n", + " temp1 = np.sqrt(np.sum((v1[i]-centroid)**2))\n", + " if(temp1>maxx):\n", + " maxx = temp1\n", + " max_val = []\n", + " max_val.append(value)\n", + " max_val.append(i)\n", + " max_val.append(temp1)\n", + " return max_val\n", + "\n", + "\n", + "cluster1 = np.where(cluster_labels==0)\n", + "cluster2 = np.where(cluster_labels==1)\n", + "cluster3 = np.where(cluster_labels==2)\n", + "cluster4 = np.where(cluster_labels==3)\n", + "cluster5 = np.where(cluster_labels==4)\n", + "\n", + "norm_df=df.copy()\n", + "norm_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses']]\n", + "x = norm_df.values #returns a numpy array\n", + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "x_scaled = min_max_scaler.fit_transform(x)\n", + "norm_df = pd.DataFrame(x_scaled)\n", + "\n", + "outliers = pd.DataFrame()\n", + "outliers_index = []\n", + "\n", + "cluster1_entries = norm_df.loc[cluster1].as_matrix()\n", + "outlier_cluster1 = distance(cluster1_entries,cluster_centers[0])\n", + "outliers_index.append(cluster1[0][outlier_cluster1[1]])\n", + "\n", + "cluster2_entries = norm_df.loc[cluster2].as_matrix()\n", + "outlier_cluster2 = distance(cluster2_entries,cluster_centers[1])\n", + "outliers_index.append(cluster2[0][outlier_cluster2[1]])\n", + "\n", + "cluster3_entries = norm_df.loc[cluster3].as_matrix()\n", + "outlier_cluster3 = distance(cluster3_entries,cluster_centers[2])\n", + "outliers_index.append(cluster3[0][outlier_cluster3[1]])\n", + "\n", + "cluster4_entries = norm_df.loc[cluster4].as_matrix()\n", + "outlier_cluster4 = distance(cluster4_entries,cluster_centers[3])\n", + "outliers_index.append(cluster4[0][outlier_cluster4[1]])\n", + "\n", + "cluster5_entries = norm_df.loc[cluster5].as_matrix()\n", + "outlier_cluster5 = distance(cluster5_entries,cluster_centers[4])\n", + "outliers_index.append(cluster5[0][outlier_cluster5[1]])\n", + "\n", + "df.loc[outliers_index]\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "These 5 businesses are the outliers for each of their clusters, they can all be seen as financially inefficient because their metrics are lacking in some aspects. For instance the first two have very high liabilities to assets ratio but low working capital ratio. On the other hand, the last row has very high working capital ratio but low and very negative surplus margin" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/OneClassSVM.ipynb b/OneClassSVM.ipynb new file mode 100644 index 0000000..b987ced --- /dev/null +++ b/OneClassSVM.ipynb @@ -0,0 +1,2806 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## One Class SVM Classification\n", + "\n", + "Classifies data as financially efficient or inefficient given training data of only efficient nonprofits. \n", + "\n", + "It defines a boundary based on the training data and classifies the data as positive(efficient) or negative(inefficient).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
0201523229349300327.xml510311790.00.9896190.0918021.574677-0.078663464318.0
1201543089349301829.xml261460932.00.9653780.0000003.910347-0.04267427439.0
2201533179349306298.xml270609504.00.9422760.0492060.6551520.088597384828.0
3201533209349304768.xml521548962.00.3845260.00081430.3927560.63312650912.0
4201533179349307343.xml731653383.00.6205600.0294871.0837390.10590128123.0
5201533189349300608.xml237324566.00.7589020.736982-0.176215-0.003914608126.0
6201523069349301367.xml43259150.00.7363120.1819980.3785920.057534212968.0
7201533069349300963.xml621273871.00.6448220.23424714.4131630.4961221426413.0
8201523099349300542.xml541897455.00.8456590.1385310.414986-0.016769461512.0
9201533099349301033.xml251869168.00.7623570.0000008.6739840.122244102044.0
10201523169349304367.xml376046335.00.7453860.00000016.4639760.25257937170.0
11201533099349301803.xml50454409.00.9152610.0000001.1587680.062649557347.0
12201523069349300142.xml272364809.00.9203070.0036060.240597-0.029663302061.0
13201543039349301204.xml463812139.00.6640880.6484130.0458230.088812139974.0
14201523089349301462.xml561797737.00.7046230.4326710.000000-0.050384214773.0
15201533069349300788.xml590638867.00.0000000.0000000.360157-0.055883762522.0
16201533079349300238.xml273314429.01.0000000.0000000.0000000.10758426171.0
17201523209349314257.xml710801566.01.0000000.0000000.000000-8.2123371145333.0
18201523209349311332.xml752204594.00.8968950.0697150.535678-0.0641003889691.0
19201533179349302173.xml251374594.00.0000000.3136300.3375370.014809423512.0
20201533179349307048.xml450448773.00.9912050.0513480.4149300.161552185894.0
21201533209349302633.xml363789851.00.0000000.2320110.521133-0.1289977329143.0
22201533099349301113.xml411712804.00.4776970.0098870.751388-0.014507814720.0
23201523039349300127.xml381557861.00.8321590.0638342.9881200.1709111369609.0
24201523079349301652.xml273741627.00.9451200.0000000.8636630.0949851234798.0
25201533039349300813.xml411495321.00.6681610.040796144.508488-0.1397771108143.0
26201533139349300208.xml60666277.00.0000000.6449040.654848-0.29614319649432.0
27201533069349301413.xml42616064.00.9512180.3386830.1785170.04112333254203.0
28201533079349300003.xml581651220.00.9103470.0268320.945357-0.020293324797.0
29201523069349300957.xml341496171.00.8506170.0688381.1257120.212899672775.0
........................
75744201542929349301039.xml205158717.00.4905450.0000000.4963500.00000052195.0
75745201542929349301104.xml440296401.00.0000000.0012552.673444-0.0754043921954.0
75746201542929349301204.xml131084330.00.0000000.9499500.043382-0.07956349480300.0
75747201503109349302210.xml30228267.00.7268960.0484910.4849470.0592591275474.0
75748201503109349302270.xml741718905.00.0000000.4443490.536678-0.021724700826.0
75749201522549349300117.xml150249365.00.0000000.0000002.6042911.82803554487.0
75750201522549349300127.xml942763918.00.0000000.1268736.226369-0.16031861868.0
75751201522549349300142.xml453774366.00.0000000.0068781.2217410.337512387277.0
75752201522549349300202.xml860507921.00.7510380.2561691.011925-0.0459283732567.0
75753201522549349300217.xml236390816.00.0000000.12808519.697597-2.047494227081.0
75754201522549349300247.xml940689854.01.0000000.5621910.151220-0.047202291609.0
75755201522549349300317.xml830164620.00.0000000.0090785.4866830.127495188778.0
75756201522549349300322.xml274726130.00.9915140.0037921.6514610.3928731796484.0
75757201503139349301280.xml810677279.00.5646010.0166954.3389490.52988885951.0
75758201503139349301285.xml232090256.00.7677431.176003-0.257737-0.06542516050928.0
75759201503139349301295.xml202902396.00.0099010.0041411704.0891090.979933808.0
75760201503139349301300.xml222471909.00.9408471.457766-1.7937190.013922469665.0
75761201503139349301315.xml50359008.00.6341990.0067123.5537220.199138179469.0
75762201503139349301335.xml20530732.00.7964640.8742270.345519-0.024264691125.0
75763201503139349301400.xml60668594.00.8589080.1833480.809719-0.11480228331141.0
75764201503139349301405.xml570884504.00.9607110.2026144.412177-0.36056057650.0
75765201513159349303976.xml911075950.00.9127920.1128526.9786760.0233831188586.0
75766201513159349303991.xml450537391.00.8828611.0000000.0000000.00000033214334.0
75767201513159349304006.xml630985623.01.0000000.1641631.3504700.070756201749.0
75768201513159349304021.xml930854620.00.2212010.00012114.6056220.07542662260.0
75769201513159349304046.xml263218152.00.6618380.1630340.730298-0.017285508851.0
75770201513159349304051.xml520887806.00.8192480.4175530.5373070.0287305894235.0
75771201513159349304061.xml942608741.00.8937040.0099071.6385780.044649243668.0
75772201513159349304071.xml330841281.00.8078120.3422690.8360170.29750525594615.0
75773201513159349304076.xml570751500.00.8877860.4155708.5715010.0634481610096.0
\n", + "

75279 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "0 201523229349300327.xml 510311790.0 0.989619 0.091802 \n", + "1 201543089349301829.xml 261460932.0 0.965378 0.000000 \n", + "2 201533179349306298.xml 270609504.0 0.942276 0.049206 \n", + "3 201533209349304768.xml 521548962.0 0.384526 0.000814 \n", + "4 201533179349307343.xml 731653383.0 0.620560 0.029487 \n", + "5 201533189349300608.xml 237324566.0 0.758902 0.736982 \n", + "6 201523069349301367.xml 43259150.0 0.736312 0.181998 \n", + "7 201533069349300963.xml 621273871.0 0.644822 0.234247 \n", + "8 201523099349300542.xml 541897455.0 0.845659 0.138531 \n", + "9 201533099349301033.xml 251869168.0 0.762357 0.000000 \n", + "10 201523169349304367.xml 376046335.0 0.745386 0.000000 \n", + "11 201533099349301803.xml 50454409.0 0.915261 0.000000 \n", + "12 201523069349300142.xml 272364809.0 0.920307 0.003606 \n", + "13 201543039349301204.xml 463812139.0 0.664088 0.648413 \n", + "14 201523089349301462.xml 561797737.0 0.704623 0.432671 \n", + "15 201533069349300788.xml 590638867.0 0.000000 0.000000 \n", + "16 201533079349300238.xml 273314429.0 1.000000 0.000000 \n", + "17 201523209349314257.xml 710801566.0 1.000000 0.000000 \n", + "18 201523209349311332.xml 752204594.0 0.896895 0.069715 \n", + "19 201533179349302173.xml 251374594.0 0.000000 0.313630 \n", + "20 201533179349307048.xml 450448773.0 0.991205 0.051348 \n", + "21 201533209349302633.xml 363789851.0 0.000000 0.232011 \n", + "22 201533099349301113.xml 411712804.0 0.477697 0.009887 \n", + "23 201523039349300127.xml 381557861.0 0.832159 0.063834 \n", + "24 201523079349301652.xml 273741627.0 0.945120 0.000000 \n", + "25 201533039349300813.xml 411495321.0 0.668161 0.040796 \n", + "26 201533139349300208.xml 60666277.0 0.000000 0.644904 \n", + "27 201533069349301413.xml 42616064.0 0.951218 0.338683 \n", + "28 201533079349300003.xml 581651220.0 0.910347 0.026832 \n", + "29 201523069349300957.xml 341496171.0 0.850617 0.068838 \n", + "... ... ... ... ... \n", + "75744 201542929349301039.xml 205158717.0 0.490545 0.000000 \n", + "75745 201542929349301104.xml 440296401.0 0.000000 0.001255 \n", + "75746 201542929349301204.xml 131084330.0 0.000000 0.949950 \n", + "75747 201503109349302210.xml 30228267.0 0.726896 0.048491 \n", + "75748 201503109349302270.xml 741718905.0 0.000000 0.444349 \n", + "75749 201522549349300117.xml 150249365.0 0.000000 0.000000 \n", + "75750 201522549349300127.xml 942763918.0 0.000000 0.126873 \n", + "75751 201522549349300142.xml 453774366.0 0.000000 0.006878 \n", + "75752 201522549349300202.xml 860507921.0 0.751038 0.256169 \n", + "75753 201522549349300217.xml 236390816.0 0.000000 0.128085 \n", + "75754 201522549349300247.xml 940689854.0 1.000000 0.562191 \n", + "75755 201522549349300317.xml 830164620.0 0.000000 0.009078 \n", + "75756 201522549349300322.xml 274726130.0 0.991514 0.003792 \n", + "75757 201503139349301280.xml 810677279.0 0.564601 0.016695 \n", + "75758 201503139349301285.xml 232090256.0 0.767743 1.176003 \n", + "75759 201503139349301295.xml 202902396.0 0.009901 0.004141 \n", + "75760 201503139349301300.xml 222471909.0 0.940847 1.457766 \n", + "75761 201503139349301315.xml 50359008.0 0.634199 0.006712 \n", + "75762 201503139349301335.xml 20530732.0 0.796464 0.874227 \n", + "75763 201503139349301400.xml 60668594.0 0.858908 0.183348 \n", + "75764 201503139349301405.xml 570884504.0 0.960711 0.202614 \n", + "75765 201513159349303976.xml 911075950.0 0.912792 0.112852 \n", + "75766 201513159349303991.xml 450537391.0 0.882861 1.000000 \n", + "75767 201513159349304006.xml 630985623.0 1.000000 0.164163 \n", + "75768 201513159349304021.xml 930854620.0 0.221201 0.000121 \n", + "75769 201513159349304046.xml 263218152.0 0.661838 0.163034 \n", + "75770 201513159349304051.xml 520887806.0 0.819248 0.417553 \n", + "75771 201513159349304061.xml 942608741.0 0.893704 0.009907 \n", + "75772 201513159349304071.xml 330841281.0 0.807812 0.342269 \n", + "75773 201513159349304076.xml 570751500.0 0.887786 0.415570 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "0 1.574677 -0.078663 464318.0 \n", + "1 3.910347 -0.042674 27439.0 \n", + "2 0.655152 0.088597 384828.0 \n", + "3 30.392756 0.633126 50912.0 \n", + "4 1.083739 0.105901 28123.0 \n", + "5 -0.176215 -0.003914 608126.0 \n", + "6 0.378592 0.057534 212968.0 \n", + "7 14.413163 0.496122 1426413.0 \n", + "8 0.414986 -0.016769 461512.0 \n", + "9 8.673984 0.122244 102044.0 \n", + "10 16.463976 0.252579 37170.0 \n", + "11 1.158768 0.062649 557347.0 \n", + "12 0.240597 -0.029663 302061.0 \n", + "13 0.045823 0.088812 139974.0 \n", + "14 0.000000 -0.050384 214773.0 \n", + "15 0.360157 -0.055883 762522.0 \n", + "16 0.000000 0.107584 26171.0 \n", + "17 0.000000 -8.212337 1145333.0 \n", + "18 0.535678 -0.064100 3889691.0 \n", + "19 0.337537 0.014809 423512.0 \n", + "20 0.414930 0.161552 185894.0 \n", + "21 0.521133 -0.128997 7329143.0 \n", + "22 0.751388 -0.014507 814720.0 \n", + "23 2.988120 0.170911 1369609.0 \n", + "24 0.863663 0.094985 1234798.0 \n", + "25 144.508488 -0.139777 1108143.0 \n", + "26 0.654848 -0.296143 19649432.0 \n", + "27 0.178517 0.041123 33254203.0 \n", + "28 0.945357 -0.020293 324797.0 \n", + "29 1.125712 0.212899 672775.0 \n", + "... ... ... ... \n", + "75744 0.496350 0.000000 52195.0 \n", + "75745 2.673444 -0.075404 3921954.0 \n", + "75746 0.043382 -0.079563 49480300.0 \n", + "75747 0.484947 0.059259 1275474.0 \n", + "75748 0.536678 -0.021724 700826.0 \n", + "75749 2.604291 1.828035 54487.0 \n", + "75750 6.226369 -0.160318 61868.0 \n", + "75751 1.221741 0.337512 387277.0 \n", + "75752 1.011925 -0.045928 3732567.0 \n", + "75753 19.697597 -2.047494 227081.0 \n", + "75754 0.151220 -0.047202 291609.0 \n", + "75755 5.486683 0.127495 188778.0 \n", + "75756 1.651461 0.392873 1796484.0 \n", + "75757 4.338949 0.529888 85951.0 \n", + "75758 -0.257737 -0.065425 16050928.0 \n", + "75759 1704.089109 0.979933 808.0 \n", + "75760 -1.793719 0.013922 469665.0 \n", + "75761 3.553722 0.199138 179469.0 \n", + "75762 0.345519 -0.024264 691125.0 \n", + "75763 0.809719 -0.114802 28331141.0 \n", + "75764 4.412177 -0.360560 57650.0 \n", + "75765 6.978676 0.023383 1188586.0 \n", + "75766 0.000000 0.000000 33214334.0 \n", + "75767 1.350470 0.070756 201749.0 \n", + "75768 14.605622 0.075426 62260.0 \n", + "75769 0.730298 -0.017285 508851.0 \n", + "75770 0.537307 0.028730 5894235.0 \n", + "75771 1.638578 0.044649 243668.0 \n", + "75772 0.836017 0.297505 25594615.0 \n", + "75773 8.571501 0.063448 1610096.0 \n", + "\n", + "[75279 rows x 7 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "df1=pd.read_csv('team_out_1.csv')\n", + "df2=pd.read_csv('team_out_a2.csv')\n", + "df3=pd.read_csv('team_out_a3.csv')\n", + "df4=pd.read_csv('team_out_Yash.csv')\n", + "df5=pd.read_csv('team_out_Yash_part1.csv')\n", + "\n", + "\n", + "df=df1.append(df2)\n", + "df=df.append(df3)\n", + "df=df.append(df4)\n", + "df=df.append(df5)\n", + "\n", + "\n", + "df.dropna(inplace=True)\n", + "df.reset_index(inplace=True,drop=True)\n", + "df=df[df.Total_Expenses>0]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 634, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
1545201502579349301175.xml200141248.01.1426070.7703540.553360-5.0892401276722.0
5627201512949349301306.xml542014609.01.0131641.069741-0.011601-0.2762761574232.0
7960201503359349300815.xml273294817.01.0102400.0207530.8577450.4241865355301.0
9800201620279349300617.xml550576920.01.0064030.4346110.0731140.0136691822040.0
13111201600949349300200.xml10367116.01.0036340.0284781.075774-0.1178471179223.0
16056201620629349300447.xml542014609.01.0131641.069741-0.011601-0.2762761574232.0
16692201620419349300712.xml311238139.01.0537810.15353834.644571-6.7661962024744.0
20638201503169349304480.xml363414823.09.9821220.9628540.224504-3.0084202118845.0
\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "1545 201502579349301175.xml 200141248.0 1.142607 0.770354 \n", + "5627 201512949349301306.xml 542014609.0 1.013164 1.069741 \n", + "7960 201503359349300815.xml 273294817.0 1.010240 0.020753 \n", + "9800 201620279349300617.xml 550576920.0 1.006403 0.434611 \n", + "13111 201600949349300200.xml 10367116.0 1.003634 0.028478 \n", + "16056 201620629349300447.xml 542014609.0 1.013164 1.069741 \n", + "16692 201620419349300712.xml 311238139.0 1.053781 0.153538 \n", + "20638 201503169349304480.xml 363414823.0 9.982122 0.962854 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "1545 0.553360 -5.089240 1276722.0 \n", + "5627 -0.011601 -0.276276 1574232.0 \n", + "7960 0.857745 0.424186 5355301.0 \n", + "9800 0.073114 0.013669 1822040.0 \n", + "13111 1.075774 -0.117847 1179223.0 \n", + "16056 -0.011601 -0.276276 1574232.0 \n", + "16692 34.644571 -6.766196 2024744.0 \n", + "20638 0.224504 -3.008420 2118845.0 " + ] + }, + "execution_count": 634, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.Program_Exp>1]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/araeyusvakil/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + "/Users/araeyusvakil/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n" + ] + } + ], + "source": [ + "small_df=df[df.Total_Expenses<1000000]\n", + "\n", + "med_df=df[df.Total_Expenses>1000000]\n", + "med_df=med_df[df.Total_Expenses<10000000]\n", + "\n", + "large_df=df[df.Total_Expenses<50000000]\n", + "large_df=large_df[df.Total_Expenses>10000000]\n", + "\n", + "national_df=df[df.Total_Expenses>50000000]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6054\n", + "%: 0.7381896266930955\n", + "NUMBER OF POSITIVE: 4469\n" + ] + } + ], + "source": [ + "print(len(large_df))\n", + "\n", + "pos_med=large_df[large_df.Program_Exp>.75]\n", + "# pos_med=pos_med[pos_med.Working_Capital>.01]\n", + "pos_med=pos_med[pos_med.Liabilities_To_Asset<1]\n", + "# pos_med=pos_med[pos_med.Surplus_Margin>.01]\n", + "\n", + "lst_temp=list(pos_med['EIN'])\n", + "print(\"%:\",len(lst_temp)/len(large_df))\n", + "print(\"NUMBER OF POSITIVE: \",len(lst_temp))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NUMBER OF MED: 23589\n", + "%: 0.3620755436856162\n", + "NUMBER OF POSITIVE: 8541\n" + ] + } + ], + "source": [ + "print(\"NUMBER OF MED: \",len(med_df))\n", + "\n", + "pos_med=med_df[med_df.Program_Exp>.75]\n", + "pos_med=pos_med[pos_med.Working_Capital>.01]\n", + "pos_med=pos_med[pos_med.Liabilities_To_Asset<1]\n", + "pos_med=pos_med[pos_med.Surplus_Margin>.01]\n", + "# \n", + "\n", + "lst_temp=list(pos_med['EIN'])\n", + "print(\"%:\",len(lst_temp)/len(med_df))\n", + "print(\"NUMBER OF POSITIVE: \",len(lst_temp))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2203\n", + "%: 0.7330912392192465\n", + "1615\n" + ] + } + ], + "source": [ + "eff_nat_df=national_df[national_df.Program_Exp>.8]\n", + "# eff_nat_df=eff_nat_df[eff_nat_df.Working_Capital>.1]\n", + "eff_nat_df=eff_nat_df[eff_nat_df.Liabilities_To_Asset<1]\n", + "# eff_nat_df=eff_nat_df[eff_nat_df.Surplus_Margin>.1]\n", + "\n", + "print(len(national_df))\n", + "lst_temp=list(eff_nat_df['EIN'])\n", + "print(\"%:\",len(lst_temp)/len(national_df))\n", + "print(len(lst_temp))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NUMBER OF SMALL: 43433\n", + "%: 0.6999516496673036\n", + "NUMBER OF POSITIVE: 30401\n" + ] + } + ], + "source": [ + "print(\"NUMBER OF SMALL: \",len(small_df))\n", + "\n", + "pos_small=small_df[small_df.Program_Exp>.5]\n", + "# pos_small=pos_small[pos_small.Working_Capital>.5]\n", + "pos_small=pos_small[pos_small.Liabilities_To_Asset<.5]\n", + "# pos_small=pos_small[pos_small.Surplus_Margin>.2]\n", + "lst_temp=list(pos_small['EIN'])\n", + "\n", + "print(\"%:\",len(lst_temp)/len(small_df))\n", + "print(\"NUMBER OF POSITIVE: \",len(lst_temp))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df=national_df #CHANGE THIS TO REQUIRED SIZE AND RUN REST OF THE CODE AS IS" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Program_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_ExpensesFilename
EIN
620497990.00.7116970.2859760.1219120.2076800.000182201533179349305948.xml
621674308.00.8761230.2857620.0950810.2070960.002472201533179349300543.xml
440579850.00.8533950.2871350.0953320.1995720.014706201523209349310187.xml
112625096.00.8259310.2864350.0843350.2055990.072800201543149349303884.xml
510216589.00.7016150.2876470.0925810.1943940.160460201523209349313822.xml
660258919.00.9071940.2866440.0950350.2067510.000918201523209349311307.xml
953766170.00.7296600.2865310.0922810.2062880.002804201523209349311657.xml
411404075.00.8653710.2854680.1254500.2090510.021943201533089349300748.xml
314390844.00.8598510.2855640.1867930.2127970.001308201533179349301758.xml
370635502.00.8626930.2861920.1022200.2129470.018294201533179349302053.xml
941156621.00.9026870.2869260.0939360.2135290.165744201533089349301428.xml
941080917.00.8440000.2867240.0913940.2068270.056671201513089349301681.xml
741287016.00.8527220.2852990.6485530.2469490.001583201513079349301421.xml
131628168.00.7052330.2856660.2292960.2163430.001423201503089349300305.xml
381359218.00.9099010.2859110.0915070.2064010.000569201503039349300645.xml
582155150.00.8571060.2865430.0951170.1992660.003023201503039349300205.xml
620636239.00.9674900.2859820.0933310.2031990.013353201513099349300131.xml
462134675.00.7483920.2856380.0954880.2167000.000467201503039349300965.xml
382776791.00.8474990.2861680.0969740.1928450.016498201503039349301010.xml
990109908.00.8859650.2860600.1325210.1619090.000196201543149349301234.xml
344428218.00.7784570.2867490.1128240.1987900.016734201533169349301178.xml
900532831.00.8660600.2885670.0776720.2054850.011010201503069349300100.xml
381359083.00.9049100.2856750.1365280.2036970.002179201503079349301720.xml
530240474.00.0000000.2868980.1140320.1906010.000289201503099349301210.xml
133783732.00.9731190.2870390.0814890.2064710.396097201523149349303117.xml
840772672.00.9592780.2855660.0878440.2044810.003671201523169349304487.xml
161533232.00.8917790.2873470.0862870.2029100.104674201523169349304557.xml
521209124.00.0000000.2880960.0755350.1897200.000026201513089349300006.xml
160968914.00.8365070.2872770.0828810.1989030.001402201533159349302003.xml
203238867.00.9368550.2878540.0783820.2050980.008632201513079349300746.xml
.....................
131921358.00.7843740.2859120.1499280.2151000.000855201503169349302405.xml
131740122.00.8099400.2866920.0972390.2116350.028923201522949349300642.xml
470662290.00.9337240.2863210.1282060.2155040.010230201503109349301775.xml
237315673.00.8746730.2852890.7802240.2314970.004367201513139349303256.xml
590594631.00.8479080.2854390.1298520.2141010.005300201513139349303411.xml
860098923.00.8461560.2862350.1089200.2074300.022347201513159349303116.xml
60646813.00.0000000.2871230.0783380.2187870.064180201542939349300119.xml
420698265.00.8700260.2861410.1128280.2114660.016394201503169349303520.xml
610523304.00.8356050.2860910.1126720.2081840.000074201512589349300416.xml
680480736.00.9599800.2877580.0789760.2052790.017427201512599349300816.xml
640723407.00.9904480.2877340.0938310.2078080.000400201503169349305790.xml
350867958.00.8328010.2865000.1032670.2146740.026676201513169349301926.xml
250965270.00.7888300.2878690.0786290.2021150.002136201513139349302081.xml
461056754.00.0000000.2873570.1375520.2192470.021543201513149349301616.xml
540853898.00.8221070.2854740.1143670.2028190.016090201503159349301525.xml
311551316.00.0000000.2882680.0714560.2065480.003117201513209349310951.xml
237385560.00.0000000.2857970.1024170.2169950.081787201523209349303667.xml
950743320.00.0000000.2852660.2509770.2205680.001429201523209349306452.xml
221750190.00.8030190.2864260.1121780.2066730.019195201542929349301029.xml
112458584.00.8855440.2874410.0835500.2036410.001476201502939349301250.xml
363992031.00.0000000.2878810.0783380.2050980.009181201503149349302640.xml
953797687.00.9057370.2854640.1671910.2281750.013651201513159349303306.xml
251469002.00.9246330.2856700.1430260.2027550.000475201503169349305045.xml
111672777.00.7879680.2873010.0936740.2138400.000704201513149349302391.xml
941606519.00.9425710.2861950.0872110.2041670.005626201503139349301485.xml
396084300.00.0000000.2852370.2255810.2115530.004633201503169349303705.xml
231857015.00.9090180.2876270.0778210.1935430.015721201513159349300131.xml
410698301.00.8817890.2863250.1090580.2095350.005620201503159349302510.xml
310929576.00.7225800.2861660.1063840.2118530.007642201513109349301611.xml
350868085.00.8233890.2868230.0951990.2048610.067859201513109349300736.xml
\n", + "

2203 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Program_Exp Liabilities_To_Asset Working_Capital \\\n", + "EIN \n", + "620497990.0 0.711697 0.285976 0.121912 \n", + "621674308.0 0.876123 0.285762 0.095081 \n", + "440579850.0 0.853395 0.287135 0.095332 \n", + "112625096.0 0.825931 0.286435 0.084335 \n", + "510216589.0 0.701615 0.287647 0.092581 \n", + "660258919.0 0.907194 0.286644 0.095035 \n", + "953766170.0 0.729660 0.286531 0.092281 \n", + "411404075.0 0.865371 0.285468 0.125450 \n", + "314390844.0 0.859851 0.285564 0.186793 \n", + "370635502.0 0.862693 0.286192 0.102220 \n", + "941156621.0 0.902687 0.286926 0.093936 \n", + "941080917.0 0.844000 0.286724 0.091394 \n", + "741287016.0 0.852722 0.285299 0.648553 \n", + "131628168.0 0.705233 0.285666 0.229296 \n", + "381359218.0 0.909901 0.285911 0.091507 \n", + "582155150.0 0.857106 0.286543 0.095117 \n", + "620636239.0 0.967490 0.285982 0.093331 \n", + "462134675.0 0.748392 0.285638 0.095488 \n", + "382776791.0 0.847499 0.286168 0.096974 \n", + "990109908.0 0.885965 0.286060 0.132521 \n", + "344428218.0 0.778457 0.286749 0.112824 \n", + "900532831.0 0.866060 0.288567 0.077672 \n", + "381359083.0 0.904910 0.285675 0.136528 \n", + "530240474.0 0.000000 0.286898 0.114032 \n", + "133783732.0 0.973119 0.287039 0.081489 \n", + "840772672.0 0.959278 0.285566 0.087844 \n", + "161533232.0 0.891779 0.287347 0.086287 \n", + "521209124.0 0.000000 0.288096 0.075535 \n", + "160968914.0 0.836507 0.287277 0.082881 \n", + "203238867.0 0.936855 0.287854 0.078382 \n", + "... ... ... ... \n", + "131921358.0 0.784374 0.285912 0.149928 \n", + "131740122.0 0.809940 0.286692 0.097239 \n", + "470662290.0 0.933724 0.286321 0.128206 \n", + "237315673.0 0.874673 0.285289 0.780224 \n", + "590594631.0 0.847908 0.285439 0.129852 \n", + "860098923.0 0.846156 0.286235 0.108920 \n", + "60646813.0 0.000000 0.287123 0.078338 \n", + "420698265.0 0.870026 0.286141 0.112828 \n", + "610523304.0 0.835605 0.286091 0.112672 \n", + "680480736.0 0.959980 0.287758 0.078976 \n", + "640723407.0 0.990448 0.287734 0.093831 \n", + "350867958.0 0.832801 0.286500 0.103267 \n", + "250965270.0 0.788830 0.287869 0.078629 \n", + "461056754.0 0.000000 0.287357 0.137552 \n", + "540853898.0 0.822107 0.285474 0.114367 \n", + "311551316.0 0.000000 0.288268 0.071456 \n", + "237385560.0 0.000000 0.285797 0.102417 \n", + "950743320.0 0.000000 0.285266 0.250977 \n", + "221750190.0 0.803019 0.286426 0.112178 \n", + "112458584.0 0.885544 0.287441 0.083550 \n", + "363992031.0 0.000000 0.287881 0.078338 \n", + "953797687.0 0.905737 0.285464 0.167191 \n", + "251469002.0 0.924633 0.285670 0.143026 \n", + "111672777.0 0.787968 0.287301 0.093674 \n", + "941606519.0 0.942571 0.286195 0.087211 \n", + "396084300.0 0.000000 0.285237 0.225581 \n", + "231857015.0 0.909018 0.287627 0.077821 \n", + "410698301.0 0.881789 0.286325 0.109058 \n", + "310929576.0 0.722580 0.286166 0.106384 \n", + "350868085.0 0.823389 0.286823 0.095199 \n", + "\n", + " Surplus_Margin Total_Expenses Filename \n", + "EIN \n", + "620497990.0 0.207680 0.000182 201533179349305948.xml \n", + "621674308.0 0.207096 0.002472 201533179349300543.xml \n", + "440579850.0 0.199572 0.014706 201523209349310187.xml \n", + "112625096.0 0.205599 0.072800 201543149349303884.xml \n", + "510216589.0 0.194394 0.160460 201523209349313822.xml \n", + "660258919.0 0.206751 0.000918 201523209349311307.xml \n", + "953766170.0 0.206288 0.002804 201523209349311657.xml \n", + "411404075.0 0.209051 0.021943 201533089349300748.xml \n", + "314390844.0 0.212797 0.001308 201533179349301758.xml \n", + "370635502.0 0.212947 0.018294 201533179349302053.xml \n", + "941156621.0 0.213529 0.165744 201533089349301428.xml \n", + "941080917.0 0.206827 0.056671 201513089349301681.xml \n", + "741287016.0 0.246949 0.001583 201513079349301421.xml \n", + "131628168.0 0.216343 0.001423 201503089349300305.xml \n", + "381359218.0 0.206401 0.000569 201503039349300645.xml \n", + "582155150.0 0.199266 0.003023 201503039349300205.xml \n", + "620636239.0 0.203199 0.013353 201513099349300131.xml \n", + "462134675.0 0.216700 0.000467 201503039349300965.xml \n", + "382776791.0 0.192845 0.016498 201503039349301010.xml \n", + "990109908.0 0.161909 0.000196 201543149349301234.xml \n", + "344428218.0 0.198790 0.016734 201533169349301178.xml \n", + "900532831.0 0.205485 0.011010 201503069349300100.xml \n", + "381359083.0 0.203697 0.002179 201503079349301720.xml \n", + "530240474.0 0.190601 0.000289 201503099349301210.xml \n", + "133783732.0 0.206471 0.396097 201523149349303117.xml \n", + "840772672.0 0.204481 0.003671 201523169349304487.xml \n", + "161533232.0 0.202910 0.104674 201523169349304557.xml \n", + "521209124.0 0.189720 0.000026 201513089349300006.xml \n", + "160968914.0 0.198903 0.001402 201533159349302003.xml \n", + "203238867.0 0.205098 0.008632 201513079349300746.xml \n", + "... ... ... ... \n", + "131921358.0 0.215100 0.000855 201503169349302405.xml \n", + "131740122.0 0.211635 0.028923 201522949349300642.xml \n", + "470662290.0 0.215504 0.010230 201503109349301775.xml \n", + "237315673.0 0.231497 0.004367 201513139349303256.xml \n", + "590594631.0 0.214101 0.005300 201513139349303411.xml \n", + "860098923.0 0.207430 0.022347 201513159349303116.xml \n", + "60646813.0 0.218787 0.064180 201542939349300119.xml \n", + "420698265.0 0.211466 0.016394 201503169349303520.xml \n", + "610523304.0 0.208184 0.000074 201512589349300416.xml \n", + "680480736.0 0.205279 0.017427 201512599349300816.xml \n", + "640723407.0 0.207808 0.000400 201503169349305790.xml \n", + "350867958.0 0.214674 0.026676 201513169349301926.xml \n", + "250965270.0 0.202115 0.002136 201513139349302081.xml \n", + "461056754.0 0.219247 0.021543 201513149349301616.xml \n", + "540853898.0 0.202819 0.016090 201503159349301525.xml \n", + "311551316.0 0.206548 0.003117 201513209349310951.xml \n", + "237385560.0 0.216995 0.081787 201523209349303667.xml \n", + "950743320.0 0.220568 0.001429 201523209349306452.xml \n", + "221750190.0 0.206673 0.019195 201542929349301029.xml \n", + "112458584.0 0.203641 0.001476 201502939349301250.xml \n", + "363992031.0 0.205098 0.009181 201503149349302640.xml \n", + "953797687.0 0.228175 0.013651 201513159349303306.xml \n", + "251469002.0 0.202755 0.000475 201503169349305045.xml \n", + "111672777.0 0.213840 0.000704 201513149349302391.xml \n", + "941606519.0 0.204167 0.005626 201503139349301485.xml \n", + "396084300.0 0.211553 0.004633 201503169349303705.xml \n", + "231857015.0 0.193543 0.015721 201513159349300131.xml \n", + "410698301.0 0.209535 0.005620 201503159349302510.xml \n", + "310929576.0 0.211853 0.007642 201513109349301611.xml \n", + "350868085.0 0.204861 0.067859 201513109349300736.xml \n", + "\n", + "[2203 rows x 6 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.reset_index(drop=True,inplace=True)\n", + "norm_df=df.copy()\n", + "\n", + "norm_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses']]\n", + "\n", + "from sklearn import preprocessing\n", + "\n", + "x = norm_df.values #returns a numpy array\n", + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "x_scaled = min_max_scaler.fit_transform(x)\n", + "norm_df = pd.DataFrame(x_scaled)\n", + "norm_df[\"Filename\"]=df['Filename']\n", + "\n", + "norm_df[\"EIN\"]=df['EIN']\n", + "norm_df.columns=['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses','Filename','EIN']\n", + "norm_df.set_index('EIN',inplace=True)\n", + "norm_df" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-1. 1. 1. ..., 1. -1. 1.]\n", + "(1101, 2)\n", + "(1101, 2) (1101, 2)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/araeyusvakil/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " app.launch_new_instance()\n", + "/Users/araeyusvakil/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" + ] + } + ], + "source": [ + "Y_class_df = pd.DataFrame()\n", + "X_class_df=norm_df.loc[lst_temp]\n", + "X_class_df['Efficiency'] = 1\n", + "\n", + "\n", + "Y_class_df['Efficiency'] = X_class_df['Efficiency'] \n", + "X_class_df.drop('Efficiency', axis=1, inplace=True)\n", + "\n", + "\n", + "new_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]\n", + "X_class_df=X_class_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]\n", + "# X_class_df=X_class_df.drop(X_class_df.index[2]) #OUTLIER REMOVER\n", + "X_class_df.reset_index(inplace=True,drop=True)\n", + "\n", + "%matplotlib inline\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.font_manager\n", + "from sklearn import svm\n", + "from scipy import stats\n", + "from mpl_toolkits.mplot3d import Axes3D\n", + "\n", + "from sklearn.decomposition import TruncatedSVD\n", + "\n", + "svd = TruncatedSVD(n_components=2, n_iter=7)\n", + "reduced_df = svd.fit_transform(new_df)\n", + "\n", + "#svd2 = TruncatedSVD(n_components=2, n_iter=7)\n", + "X_classtrain_df = svd.fit_transform(X_class_df)\n", + "\n", + "\n", + "'''outliers_fraction = 0.25'''\n", + "#colors = ['m', 'g', 'b']\n", + "clf = svm.OneClassSVM(nu=0.1, kernel=\"linear\", gamma=.01,coef0=1.5)\n", + "clf.fit(X_classtrain_df)\n", + "#y_pred_test = clf.predict(normalized_df)\n", + "#print(y_pred_test)\n", + "Z1 = clf.predict(reduced_df)\n", + "res_matrix=Z1\n", + "print(res_matrix)\n", + "#print(reduced_df.shape)\n", + "\n", + "if Z1.shape[0]%2==1:\n", + " Z1=Z1[:-1]\n", + "temp_Z1=Z1\n", + "Z1 = Z1.reshape((-1,2))\n", + "print(Z1.shape)\n", + "xx1 = []\n", + "yy1= []\n", + "for i in reduced_df:\n", + " xx1.append(i[0])\n", + " yy1.append(i[1])\n", + "x1 = np.asarray(xx1)\n", + "y1 = np.asarray(yy1)\n", + "temp_y1=y1\n", + "temp_x1=x1\n", + "if len(x1)%2==1: #IS ODD:\n", + " x1=x1[:-1]\n", + "if len(y1)%2==1:\n", + " y1=y1[:-1]\n", + "x1 = x1.reshape((-1,2))\n", + "y1 = y1.reshape((-1,2))\n", + "print(x1.shape,y1.shape)\n", + "\n", + "# plt.figure(0)\n", + "# plt.contourf(reduced_df[0:Z1.shape[0]], reduced_df[Z1.shape[0]:len(res_matrix)-1], Z1,cmap=plt.cm.coolwarm)\n", + "# plt.figure(1)\n", + "# plt.contourf(x1, y1, Z1)\n", + "\n", + "# fig = plt.figure()\n", + "# ax = fig.add_subplot(111, projection='3d')\n", + "# ax.contourf(x1,y1,Z1)\n", + "\n", + "# fig = plt.figure()\n", + "# ax = fig.add_subplot(111, projection='3d')\n", + "# ax.contour(reduced_df[0:Z1.shape[0]], reduced_df[Z1.shape[0]:len(res_matrix)-1], Z1,cmap=plt.cm.coolwarm)\n", + "\n", + "# fig = plt.figure()\n", + "# ax = fig.add_subplot(111, projection='3d')\n", + "# ax.plot_trisurf(temp_x1,temp_y1, temp_Z1,cmap=plt.cm.coolwarm)\n", + "# # #plt.scatter(X_class_df.as_matrix()[:, 0], X_class_df.as_matrix()[:, 1], color='black')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "623\n", + "0.28279618701770315\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameEINProgram_ExpLiabilities_To_AssetWorking_CapitalSurplus_MarginTotal_Expenses
0201533179349305948.xml6.20498e+080.7185280.2864031.057050.02902965.19174e+07
1201523209349313822.xml5.10217e+080.7083490.9125010.345514-0.1203471.71046e+09
2201523209349311657.xml9.53766e+080.7366630.4943160.3382490.01338217.90507e+07
3201503089349300305.xml1.31628e+080.7120020.1700643.662070.1264296.4757e+07
4201503039349300965.xml4.62135e+080.7555750.1597230.4160340.1304445.48697e+07
5201533169349301178.xml3.44428e+080.7859290.5758370.836588-0.07091812.23198e+08
6201503099349301210.xml5.3024e+0800.6319530.865902-0.1629975.30245e+07
7201513089349300006.xml5.21209e+0801.0808-0.0679868-0.1729035.03023e+07
8201503099349300710.xml3.11668e+080.7201450.3843550.7014840.0577912.10115e+08
9201513099349301401.xml2.22325e+080.7540580.3843880.943859-0.1382721.78473e+08
10201503079349301165.xml5.30215e+0800.2112011.334580.1775531.51622e+08
11201523209349309137.xml5.91093e+080.7258630.2527721.40097-0.008954691.01897e+08
12201533169349302458.xml7.1025e+080.8096370.350220.7746750.04324841.43938e+08
13201533169349301973.xml9.30603e+080.7540090.3925760.8159020.09452096.09807e+08
14201533169349304798.xml3.8149e+080.7697760.6115980.265935-0.01146395.74491e+07
15201523149349301217.xml5.36e+0801.5614-0.262408-0.1048656.15463e+07
16201532579349301178.xml9.10567e+080.7097240.4833380.328049-0.01335924.01986e+08
17201512879349300501.xml1.3174e+080.7628180.8642610.140138-0.4161017.25797e+07
18201532579349300048.xml3.70661e+080.81332700-0.2184776.60125e+07
19201512999349300926.xml4.11294e+080.7179650.7553060.154674-0.1139581.06079e+08
20201522879349300742.xml7.41031e+080.7767010.417793.761260.3591466.6327e+07
21201512869349301651.xml9.51644e+080.7546320.04505123.573310.1840167.30474e+07
22201513029349301001.xml3.40754e+0800.1759731.08541-0.02140256.6673e+07
23201503019349300920.xml5.302e+0800.6009120.09579890.08664922.65447e+08
24201513029349301666.xml5.70752e+080.914972-106.882-0.308261-0.01892461.21089e+08
25201542999349301319.xml5.91378e+08003.55804-0.1255476.12861e+07
26201532879349300843.xml5.22001e+080.6611611.30181-1.14255-0.1373086.65748e+07
27201532759349300858.xml6.10582e+0700.6229610.170884-0.02349455.50081e+07
28201502759349300960.xml8.40399e+080.7903030.07232721.811020.09236659.13628e+07
29201502549349300340.xml2.0233e+080.719910.4580810.148249-0.0177625.85896e+07
........................
593201533209349302483.xml2.26106e+080.8149690.4293150.6783360.009554118.51476e+07
594201533209349302408.xml7.50055e+0800.7977820.09471920.008448421.20344e+08
595201533209349302513.xml3.40715e+080.8202030.2194010.511917-0.05350564.48244e+08
596201543179349304149.xml1.31628e+0800.3682250.4025650.1055381.41531e+09
597201523209349302112.xml1.31624e+080.7151790.3724680.4025570.05019371.85365e+08
598201523209349302152.xml6.11649e+080.8133360.1025261.037730.1087443.5946e+09
599201513209349313236.xml9.42736e+080.7893570.7416442.06452-0.1029847.83482e+07
600201523179349306007.xml2.51494e+080.7819120.5850040.469064-0.06279356.3717e+07
601201523209349302462.xml5.60928e+080.8133370.0531221.010910.05879951.06149e+09
602201513209349311296.xml9.11915e+080.7958970.358230.429839-0.01988832.59222e+08
603201523179349303937.xml1.3174e+080.787550.9896470.004172530.004125622.15966e+08
604201522939349300957.xml8.50313e+080.6862830.69690.150211-0.05097055.50494e+07
605201512959349300826.xml3.91028e+080.8128470.5202390.555575-0.02042222.02017e+08
606201523209349300522.xml5.21572e+080.7917240.2525990.604276-0.07124811.01498e+08
607201523179349303117.xml7.52679e+080.810856-0.1025140.8128380.1768051.97308e+08
608201503109349302150.xml3.51968e+080.8180640.4677830.6927420.2043415.2751e+07
609201503109349301980.xml2.50818e+0800.9129270.1508720.1612033.97704e+08
610201503169349302405.xml1.31921e+080.7919030.2624111.736690.1124635.88801e+07
611201522949349300642.xml1.3174e+080.8177140.5544760.4585230.07349683.49322e+08
612201542939349300119.xml6.06468e+0700.71610900.1539077.14162e+08
613201513139349302081.xml2.50965e+080.7964010.9957830.00705749-0.03353747.21422e+07
614201513149349301616.xml4.61057e+0800.8037811.436460.1590812.72957e+08
615201513209349310951.xml3.11551e+0801.14522-0.1669490.0163068.22896e+07
616201523209349303667.xml2.37386e+0800.2190720.5841310.133768.96363e+08
617201523209349306452.xml9.50743e+0800.02005644.188020.1739396.48187e+07
618201542929349301029.xml2.2175e+080.8107270.4548940.8209080.01770892.48663e+08
619201503149349302640.xml3.63992e+0801001.4504e+08
620201513149349302391.xml1.11673e+080.795530.7827780.3720430.09829565.73239e+07
621201503169349303705.xml3.96084e+0800.009418913.571950.07257259.79746e+07
622201513109349301611.xml3.1093e+080.7295150.3576130.6803610.07595161.29112e+08
\n", + "

623 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Filename EIN Program_Exp Liabilities_To_Asset \\\n", + "0 201533179349305948.xml 6.20498e+08 0.718528 0.286403 \n", + "1 201523209349313822.xml 5.10217e+08 0.708349 0.912501 \n", + "2 201523209349311657.xml 9.53766e+08 0.736663 0.494316 \n", + "3 201503089349300305.xml 1.31628e+08 0.712002 0.170064 \n", + "4 201503039349300965.xml 4.62135e+08 0.755575 0.159723 \n", + "5 201533169349301178.xml 3.44428e+08 0.785929 0.575837 \n", + "6 201503099349301210.xml 5.3024e+08 0 0.631953 \n", + "7 201513089349300006.xml 5.21209e+08 0 1.0808 \n", + "8 201503099349300710.xml 3.11668e+08 0.720145 0.384355 \n", + "9 201513099349301401.xml 2.22325e+08 0.754058 0.384388 \n", + "10 201503079349301165.xml 5.30215e+08 0 0.211201 \n", + "11 201523209349309137.xml 5.91093e+08 0.725863 0.252772 \n", + "12 201533169349302458.xml 7.1025e+08 0.809637 0.35022 \n", + "13 201533169349301973.xml 9.30603e+08 0.754009 0.392576 \n", + "14 201533169349304798.xml 3.8149e+08 0.769776 0.611598 \n", + "15 201523149349301217.xml 5.36e+08 0 1.5614 \n", + "16 201532579349301178.xml 9.10567e+08 0.709724 0.483338 \n", + "17 201512879349300501.xml 1.3174e+08 0.762818 0.864261 \n", + "18 201532579349300048.xml 3.70661e+08 0.813327 0 \n", + "19 201512999349300926.xml 4.11294e+08 0.717965 0.755306 \n", + "20 201522879349300742.xml 7.41031e+08 0.776701 0.41779 \n", + "21 201512869349301651.xml 9.51644e+08 0.754632 0.0450512 \n", + "22 201513029349301001.xml 3.40754e+08 0 0.175973 \n", + "23 201503019349300920.xml 5.302e+08 0 0.600912 \n", + "24 201513029349301666.xml 5.70752e+08 0.914972 -106.882 \n", + "25 201542999349301319.xml 5.91378e+08 0 0 \n", + "26 201532879349300843.xml 5.22001e+08 0.661161 1.30181 \n", + "27 201532759349300858.xml 6.10582e+07 0 0.622961 \n", + "28 201502759349300960.xml 8.40399e+08 0.790303 0.0723272 \n", + "29 201502549349300340.xml 2.0233e+08 0.71991 0.458081 \n", + ".. ... ... ... ... \n", + "593 201533209349302483.xml 2.26106e+08 0.814969 0.429315 \n", + "594 201533209349302408.xml 7.50055e+08 0 0.797782 \n", + "595 201533209349302513.xml 3.40715e+08 0.820203 0.219401 \n", + "596 201543179349304149.xml 1.31628e+08 0 0.368225 \n", + "597 201523209349302112.xml 1.31624e+08 0.715179 0.372468 \n", + "598 201523209349302152.xml 6.11649e+08 0.813336 0.102526 \n", + "599 201513209349313236.xml 9.42736e+08 0.789357 0.741644 \n", + "600 201523179349306007.xml 2.51494e+08 0.781912 0.585004 \n", + "601 201523209349302462.xml 5.60928e+08 0.813337 0.053122 \n", + "602 201513209349311296.xml 9.11915e+08 0.795897 0.35823 \n", + "603 201523179349303937.xml 1.3174e+08 0.78755 0.989647 \n", + "604 201522939349300957.xml 8.50313e+08 0.686283 0.6969 \n", + "605 201512959349300826.xml 3.91028e+08 0.812847 0.520239 \n", + "606 201523209349300522.xml 5.21572e+08 0.791724 0.252599 \n", + "607 201523179349303117.xml 7.52679e+08 0.810856 -0.102514 \n", + "608 201503109349302150.xml 3.51968e+08 0.818064 0.467783 \n", + "609 201503109349301980.xml 2.50818e+08 0 0.912927 \n", + "610 201503169349302405.xml 1.31921e+08 0.791903 0.262411 \n", + "611 201522949349300642.xml 1.3174e+08 0.817714 0.554476 \n", + "612 201542939349300119.xml 6.06468e+07 0 0.716109 \n", + "613 201513139349302081.xml 2.50965e+08 0.796401 0.995783 \n", + "614 201513149349301616.xml 4.61057e+08 0 0.803781 \n", + "615 201513209349310951.xml 3.11551e+08 0 1.14522 \n", + "616 201523209349303667.xml 2.37386e+08 0 0.219072 \n", + "617 201523209349306452.xml 9.50743e+08 0 0.0200564 \n", + "618 201542929349301029.xml 2.2175e+08 0.810727 0.454894 \n", + "619 201503149349302640.xml 3.63992e+08 0 1 \n", + "620 201513149349302391.xml 1.11673e+08 0.79553 0.782778 \n", + "621 201503169349303705.xml 3.96084e+08 0 0.00941891 \n", + "622 201513109349301611.xml 3.1093e+08 0.729515 0.357613 \n", + "\n", + " Working_Capital Surplus_Margin Total_Expenses \n", + "0 1.05705 0.0290296 5.19174e+07 \n", + "1 0.345514 -0.120347 1.71046e+09 \n", + "2 0.338249 0.0133821 7.90507e+07 \n", + "3 3.66207 0.126429 6.4757e+07 \n", + "4 0.416034 0.130444 5.48697e+07 \n", + "5 0.836588 -0.0709181 2.23198e+08 \n", + "6 0.865902 -0.162997 5.30245e+07 \n", + "7 -0.0679868 -0.172903 5.03023e+07 \n", + "8 0.701484 0.057791 2.10115e+08 \n", + "9 0.943859 -0.138272 1.78473e+08 \n", + "10 1.33458 0.177553 1.51622e+08 \n", + "11 1.40097 -0.00895469 1.01897e+08 \n", + "12 0.774675 0.0432484 1.43938e+08 \n", + "13 0.815902 0.0945209 6.09807e+08 \n", + "14 0.265935 -0.0114639 5.74491e+07 \n", + "15 -0.262408 -0.104865 6.15463e+07 \n", + "16 0.328049 -0.0133592 4.01986e+08 \n", + "17 0.140138 -0.416101 7.25797e+07 \n", + "18 0 -0.218477 6.60125e+07 \n", + "19 0.154674 -0.113958 1.06079e+08 \n", + "20 3.76126 0.359146 6.6327e+07 \n", + "21 3.57331 0.184016 7.30474e+07 \n", + "22 1.08541 -0.0214025 6.6673e+07 \n", + "23 0.0957989 0.0866492 2.65447e+08 \n", + "24 -0.308261 -0.0189246 1.21089e+08 \n", + "25 3.55804 -0.125547 6.12861e+07 \n", + "26 -1.14255 -0.137308 6.65748e+07 \n", + "27 0.170884 -0.0234945 5.50081e+07 \n", + "28 1.81102 0.0923665 9.13628e+07 \n", + "29 0.148249 -0.017762 5.85896e+07 \n", + ".. ... ... ... \n", + "593 0.678336 0.00955411 8.51476e+07 \n", + "594 0.0947192 0.00844842 1.20344e+08 \n", + "595 0.511917 -0.0535056 4.48244e+08 \n", + "596 0.402565 0.105538 1.41531e+09 \n", + "597 0.402557 0.0501937 1.85365e+08 \n", + "598 1.03773 0.108744 3.5946e+09 \n", + "599 2.06452 -0.102984 7.83482e+07 \n", + "600 0.469064 -0.0627935 6.3717e+07 \n", + "601 1.01091 0.0587995 1.06149e+09 \n", + "602 0.429839 -0.0198883 2.59222e+08 \n", + "603 0.00417253 0.00412562 2.15966e+08 \n", + "604 0.150211 -0.0509705 5.50494e+07 \n", + "605 0.555575 -0.0204222 2.02017e+08 \n", + "606 0.604276 -0.0712481 1.01498e+08 \n", + "607 0.812838 0.176805 1.97308e+08 \n", + "608 0.692742 0.204341 5.2751e+07 \n", + "609 0.150872 0.161203 3.97704e+08 \n", + "610 1.73669 0.112463 5.88801e+07 \n", + "611 0.458523 0.0734968 3.49322e+08 \n", + "612 0 0.153907 7.14162e+08 \n", + "613 0.00705749 -0.0335374 7.21422e+07 \n", + "614 1.43646 0.159081 2.72957e+08 \n", + "615 -0.166949 0.016306 8.22896e+07 \n", + "616 0.584131 0.13376 8.96363e+08 \n", + "617 4.18802 0.173939 6.48187e+07 \n", + "618 0.820908 0.0177089 2.48663e+08 \n", + "619 0 0 1.4504e+08 \n", + "620 0.372043 0.0982956 5.73239e+07 \n", + "621 3.57195 0.0725725 9.79746e+07 \n", + "622 0.680361 0.0759516 1.29112e+08 \n", + "\n", + "[623 rows x 7 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df=norm_df.copy()\n", + "new_df.reset_index(inplace=True)\n", + "\n", + "temp_list=[]\n", + "\n", + "count=0\n", + "for i in range(len(res_matrix)):\n", + " if res_matrix[i]==-1.0:\n", + " temp_list.append(pd.DataFrame(df.loc[i]).transpose())\n", + " count+=1\n", + "print(count)\n", + "print(count/len(norm_df))\n", + "ineff_nat_df=pd.concat(temp_list)\n", + "ineff_nat_df.reset_index(inplace=True,drop=True)\n", + "ineff_nat_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "OneClassSVM determined these 623 National Sized Nonprofits as being financially inefficient, they are failing at one of the metrics, either their liabilities to assets ratio is too high or their program expenses ratio, working captial ratio or surplus margin is too low." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/data_scraping.py b/data_scraping.py new file mode 100644 index 0000000..9580e12 --- /dev/null +++ b/data_scraping.py @@ -0,0 +1,173 @@ +import time +import sys +import csv +import json +import urllib.request + +import xml.etree.ElementTree as ET + +def write_csv(filename,lst): + global writer + prog_ratio=0 + prog_exp=0 + temp_net_asset=0 + unres_net_asset=0 + tree = ET.parse(filename) + root=tree.getroot() + break_out=False + + for child in root[0]: + if child.tag.split('}')[1]=="Filer": + for grandchild in child: + if grandchild.tag.split('}')[1]=="EIN": + ein=grandchild.text + break + if break_out==True: + break + break_out=False + for child in root[1]: + if child.tag.split('}')[1]!="IRS990": #REMOVING IRS990 EZ and IRS990 PF + lst[1]+=1 + #os.remove(filename) + return None + else: + lst[0]+=1 + break + for child in root[1]: + for grandchild in child: + if grandchild.tag.split('}')[1]=="DoNotFollowSFAS117" or grandchild.tag.split('}')[1]=="OrgDoesNotFollowSFAS117Ind": + #os.remove(filename) + lst[3]+=1 + return None + else: + if grandchild.tag.split('}')[1]=="TotalFunctionalExpensesGrp" or grandchild.tag.split('}')[1]=="TotalFunctionalExpenses": #TOTAL AMT ON PROGRAM SHOULD BE ABOVE 75% + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="TotalAmt" or great_grandchild.tag.split('}')[1]=="Total": + total_amt=int(great_grandchild.text) + if great_grandchild.tag.split('}')[1]=="ProgramServicesAmt": + prog_exp=int(great_grandchild.text) + + if grandchild.tag.split('}')[1]=="TotalAssetsGrp" or grandchild.tag.split('}')[1]=="TotalAssets": + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="EOYAmt" or great_grandchild.tag.split('}')[1]=="EOY": + end_total_asset=int(great_grandchild.text) + if great_grandchild.tag.split('}')[1]=="BOYAmt" or great_grandchild.tag.split('}')[1]=="BOY": + beg_total_assets=int(great_grandchild.text) + if grandchild.tag.split('}')[1]=="TotalLiabilitiesGrp" or grandchild.tag.split('}')[1]=="TotalLiabilities": + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="EOYAmt" or great_grandchild.tag.split('}')[1]=="EOY": + total_liability=int(great_grandchild.text) + if grandchild.tag.split('}')[1]=="UnrestrictedNetAssetsGrp": + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="EOYAmt": + unres_net_asset=int(great_grandchild.text) +# if total_amt==0: +# dic[ein][2]="N/A" +# else: +# dic[ein][2]=unres_net_asset/total_amt + if grandchild.tag.split('}')[1]=="TemporarilyRstrNetAssetsGrp": #ONLY A FEW HAVE THIS + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="EOYAmt": + temp_net_asset=int(great_grandchild.text) + if grandchild.tag.split('}')[1] =="CYTotalRevenueAmt" or grandchild.tag.split('}')[1]== "TotalRevenueCurrentYear": + total_rev=int(grandchild.text) + if grandchild.tag.split('}')[1] =="TotalNetAssetsFundBalanceGrp" or grandchild.tag.split('}')[1] =="TotalNetAssetsFundBalances": + for great_grandchild in grandchild: + if great_grandchild.tag.split('}')[1]=="BOYAmt" or great_grandchild.tag.split('}')[1]=="BOY": + lst[2]+=1 + beg_net_assets=int(great_grandchild.text) + if great_grandchild.tag.split('}')[1]=="EOYAmt" or great_grandchild.tag.split('}')[1]=="EOY": + end_net_assets=int(great_grandchild.text) + + if total_amt!=0 and prog_exp!=0: + prog_ratio=prog_exp/total_amt + else: + prog_ratio=0 + + if total_rev!=0: + surplus_margin=((end_net_assets-beg_net_assets)/total_rev) + else: + surplus_margin=0 + if end_total_asset==0 or total_amt==0: + work_cap_ratio=0 + lia_asset_ratio=0 + else: + work_cap_ratio=((unres_net_asset+temp_net_asset)/total_amt) + lia_asset_ratio=total_liability/end_total_asset + + #with open('team_out.txt', 'a') as f: + writer.writerow([filename,ein,prog_ratio,lia_asset_ratio,work_cap_ratio,surplus_margin,total_amt]) + #os.remove(filename) + + +# In[ ]: + +import ijson + +filename="index_2016.json" + +# Download json file +url = "https://s3.amazonaws.com/irs-form-990/index_2016.json" + +urllib.request.urlretrieve(url,"index_2016.json") + +with open(filename,'r') as f: + objects=ijson.items(f,'Filings2016') + columns=list(objects) + + +import urllib.request +import xml.etree.ElementTree as E +import os + +ObjectId=[] +form_types = ['990EZ', '990PF'] +ignore_types = 0 +use_types = 0 + + +for i in range(len(columns[0])): + if columns[0][i]['FormType'] not in form_types: + use_types += 1 + ObjectId+=[columns[0][i]['ObjectId']] + else: + ignore_types += 1 + +print("Objects other than 990EZ and 990PF: " + str(use_types)) +print("Objects 990EZ and 990PF: " + str(ignore_types)) +print("Length of ObjectId: " + str(len(ObjectId))) + +base_url = "https://s3.amazonaws.com/irs-form-990/" +end_url = "_public.xml" +error_file = open('error_file.txt', 'w') +team_out = open('team_out.txt', 'w') +writer = csv.writer(team_out,quoting=csv.QUOTE_MINIMAL) + +lst=[0,0,0,0] + +for i in range(len(ObjectId)): + + if i == 50000: + print("processed 50,000 records, exit now") + sys.exit() + + new_url = base_url + ObjectId[i] + end_url + filename = ObjectId[i] + ".xml" + + try: + urllib.request.urlretrieve(new_url,filename) + write_csv(filename,lst) + time.sleep(1) + except: + error_string = str(new_url) + ' ' + '\n' + error_file.write(error_string) + #continue + + os.remove(filename) + + if i % 500 == 0: + print(" " + str(i) + " records processed, now sleep for 5 seconds") + time.sleep(5) + #print(lst) + +