PythonBiellaGroup
diff --git a/‎Flask/Flask05/burlesco70/ModelloDB.png‎
46.1 KB b/‎Flask/Flask05/burlesco70/ModelloDB.png‎
46.1 KB
diff --git a/‎Flask/Flask05/burlesco70/utility/AzureMaps.ipynb‎
Lines changed: 882 additions & 0 deletions b/‎Flask/Flask05/burlesco70/utility/AzureMaps.ipynb‎
Lines changed: 882 additions & 0 deletions
diff --git a/‎Flask/Flask05/burlesco70/utility/__init__.py‎ b/‎Flask/Flask05/burlesco70/utility/__init__.py‎
diff --git a/‎Flask/Flask05/burlesco70/utility/azure_maps.py‎
Lines changed: 112 additions & 0 deletions b/‎Flask/Flask05/burlesco70/utility/azure_maps.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎Flask/Flask05/burlesco70/utility/azuremaps_config.yml‎
Lines changed: 5 additions & 0 deletions b/‎Flask/Flask05/burlesco70/utility/azuremaps_config.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Flask/Flask05/burlesco70/utility/classification_functions.py‎
Lines changed: 191 additions & 0 deletions b/‎Flask/Flask05/burlesco70/utility/classification_functions.py‎
Lines changed: 191 additions & 0 deletions
diff --git a/‎Flask/Flask05/burlesco70/utility/feature_engineering.py‎
Lines changed: 68 additions & 0 deletions b/‎Flask/Flask05/burlesco70/utility/feature_engineering.py‎
Lines changed: 68 additions & 0 deletions
@@ -0,0 +1,112 @@
+import aiohttp
+import asyncio
+import pandas as pd
+import datetime
+from tqdm import tqdm
+from utility import get_folder_path
+from yaml_utility import read_yaml_file
+
+config_base_path = get_folder_path('./')
+configurations = read_yaml_file(config_base_path, 'azuremaps_config.yml')
+
+client_id = configurations['client_id']
+subscription_id = configurations['subscription_id']
+subscription_key = configurations['subscription_key']
+language = configurations['language']
+country = configurations['country']
+
+
+async def azure_map_geo_search(df, key, columns, config_path='', config_filename='', error_remove=True):
+    """
+    Search Latitude and Longitude about a city using a query
+    This service use Azure Maps so you will need a subscription key for the service
+    Please use a yaml file with your subscription keys
+    If you want more specifications about yaml file please see your project documentation
+    
+    For query creation need the dataframe and a specific column used as a Key
+    You also need a list of columns for the research (like: Comune, Provincia, Nazione, ...)
+    
+    Optionally you can remove dataframe rows that return error from the API Call.
+    This is usefull to keep only significant features
+    
+    """
+
+    # Get service configurations from yaml file
+    if config_path == '' or config_path is None:
+        config_path = get_folder_path('./')
+
+    if config_filename == '' or config_filename is None:
+        config_filename = '../../../LAVORO/DataLab/Rework/DatalabV1_Rework/Code/utility/azuremaps_config.yml'
+
+    print(f"Config filename: {config_filename}, config path: {config_path}")
+    configurations = read_yaml_file(config_path, config_filename)
+
+    client_id = configurations['client_id']
+    subscription_id = configurations['subscription_id']
+    subscription_key = configurations['subscription_key']
+
+    language = configurations['language']
+    country = configurations['country']
+
+    # Define the default service uri
+    # Documentation: https://docs.microsoft.com/en-us/rest/api/maps/search/getsearchaddress
+    service_uri = "https://atlas.microsoft.com/search/address/json?subscription-key={}&api-version=1.0&query={}&countrySet={}&language={}"
+
+    # Define usefull lists and variables
+    latitude = []
+    longitude = []
+    errors = []
+
+    # Open the session for API Call
+    session = aiohttp.ClientSession()
+
+    for i, el in enumerate(tqdm(df[key].tolist())):
+
+        # Generate the query for the search
+        query = ''
+        for k, col in enumerate(columns):
+            search_object = df.loc[i, columns[k]]
+            query = query + str(search_object) + ', '
+
+        request = service_uri.format(subscription_key, query, country, language)
+
+        try:
+            response = await (await session.get(request)).json()
+
+            # response['results'][0]['address']['countrySecondarySubdivision']
+            latitude.append(response['results'][0]['position']['lat'])
+            longitude.append(response['results'][0]['position']['lon'])
+
+        except Exception as message:
+            print(f"Impossibile to get information for element {i} about: {query} because: {message}")
+            errors.append([i, el])
+            continue
+
+    # Close the session
+    session = await(session.close())
+    print(f"Download completed with {len(errors)} errors. Please check the errors list to see informations")
+
+    # Remove errors (Optional)
+    if error_remove:
+        print("Removing errors from original dataframe")
+        # Remove errors from original dataframe if there are into the previous procedure
+        df_no_errors = df.copy()
+
+        if errors != []:
+            for i, e in enumerate(errors):
+                codice = e[1]
+
+                indexNames = df[df['key'] == codice].index
+
+                # Delete these row indexes from dataFrame
+                df.drop(indexNames, inplace=True)
+
+                print(f"Removed errors {codice} in position: {indexNames}")
+
+    # Create columns into the result_dataframe
+    df['Latitude'] = latitude
+    df['Longitude'] = longitude
+
+    print(f"Result Dataframe Shape: {df_no_errors.shape}")
+
+    return df
@@ -0,0 +1,5 @@
+client_id: "7bd04840-71e5-4756-8034-80a300be8d5e"
+subscription_id: "8b4109a9-6106-434c-9bc6-cf6324357454"
+subscription_key: "PiEFZPiPl8ibQudVmL6e-wcAI7-Y92UkvvK57uCHcJc"
+language: "it-IT"
+country: "it"
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 16 10:59:17 2020
+
+@author: alborsa1
+"""
+# %% Import Libraries
+import pandas as pd
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+from sklearn import preprocessing
+import sklearn.metrics as metrics
+
+
+# %% Function for Features Engineering
+
+# -----------------------------------------------------------------------------
+# Function for getting mapping from Encoding function
+def get_integer_mapping(le):
+    '''
+    Return a dict mapping labels to their integer values
+    from an SKlearn LabelEncoder
+    le = a fitted SKlearn LabelEncoder
+    '''
+    res = {}
+    for cl in le.classes_:
+        res.update({cl: le.transform([cl])[0]})
+
+    return res
+
+
+def Encode_fields(PandasDF, fields):
+    '''
+    Return the dataframe with the fields encoded and the mapping infos
+        INPUT:
+            - PandasDF  : Dataframe
+            - fields    : List of fields
+    '''
+    Mapping = []
+    for field in fields:
+        print("Encoding.. :", field)
+        TempDF = PandasDF.loc[:, field].copy()
+        TempDF.loc[TempDF.isnull() == True] = '-99'
+        Encoder = preprocessing.LabelEncoder()
+        Fitted_Encoder = Encoder.fit(TempDF)
+        Encoded_label = Fitted_Encoder.transform(TempDF)
+        Mapping.append([field, get_integer_mapping(Encoder)])
+        PandasDF.loc[:, field] = Encoded_label
+
+    return PandasDF, Mapping
+
+
+# -----------------------------------------------------------------------------
+
+# Function for scaling features 
+def scaleFeaturesDF(data_train):
+    ''' Feature scaling is a type of transformation that only changes the
+        scale, but not number of features. Because of this, we can still
+        use the original dataset's column names... so long as we keep in
+        mind that the _units_ have been altered:
+        
+        Method: preprocessing.StandardScaler()
+            
+        INPUT:
+            - df : Pandas dataframe for training scaling features
+        
+        OUTPUT:
+            - data_train : data_train transformed
+            - transf     : model used for scaling variable of Training Dataset
+        
+    '''
+    X = data_train.columns
+
+    transf = preprocessing.StandardScaler(with_mean=True).fit(data_train)
+    data_train = transf.transform(data_train)
+    data_train = pd.DataFrame(data=data_train, columns=X)
+
+    return data_train, transf
+
+
+# -----------------------------------------------------------------------------
+
+# %% Function for Training Classification
+
+# Split in train and test datasest
+def split(split_dataset, X, y, perc_testing):
+    '''
+        Input:
+            - split_dataset   : True or False
+            - X               : Features Dataset
+            - y               : Label Dataset
+            - perc_testing    : dataset percentage to assign to testing phase
+        Output:
+            - data_train      : dataset to train model
+            - data_test       : dataset to test model
+            - label_train     : label of train dataset
+            - label_test      : labelt of test dataset
+    '''
+    if split_dataset:
+        data_train, data_test, label_train, label_test = train_test_split(X, y, test_size=perc_testing, random_state=7)
+        print("##--**: Complete to Split Dataset")
+        print("Testing dataset dimension equal to", perc_testing * 100, "% of the initial dataset")
+    else:
+        data_train, data_test, label_train, label_test = X, X, y, y
+        print("##--**: Dataset no splitted")
+
+    return data_train, data_test, label_train, label_test
+
+
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Function for computing classification evaluation 
+def compute_evaluation_stats(label_test, prediction_test):
+    '''
+        
+    '''
+    y_true = label_test.values
+    y_pred = prediction_test
+    columns = ['Closed', 'Open']
+    confusion = metrics.confusion_matrix(y_true, y_pred)
+    plt.imshow(confusion, cmap=plt.cm.Blues, interpolation='nearest')
+    plt.xticks([0, 1], columns, rotation='vertical')
+    plt.yticks([0, 1], columns)
+    plt.colorbar()
+    plt.show()
+
+    tn, fp, fn, tp = metrics.confusion_matrix(y_true, y_pred).ravel()
+    print("True Positive:", tp)
+    print("True Negative:", tn)
+    print("False Positive:", fp)
+    print("False Negative:", fn)
+
+    precision = tp / (tp + fp)
+    recall = tp / (tp + fn)
+    print("Precision:", precision)
+    print("Recall:", recall)
+
+    return precision, recall
+
+
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Function for K-Neighbors
+def kneigh(df_train, df_test, label_train, label_test):
+    '''
+        Function for Training a K-Neighbors Classifier
+        Input:
+            - df_train      : Features dataset for training model
+            - df_test       : Features dataset for testing model
+            - label_train   : Label training dataset
+            - label_test    : Label testing dataset
+            
+        Output:
+            - knmodel       : Model 
+            - knmodel_stats : Statistics about the model
+    '''
+    # Set model parameters
+    print("##--**: Computing K-Neighbors classifier..")
+    neighbors = 5
+    print("##--**: N-Neighbors:", neighbors)
+
+    # Define model 
+    knmodel = KNeighborsClassifier(n_neighbors=neighbors, weights='uniform')
+
+    # Train model
+    print("##--**.a: Train KNeighborsClassifier model..")
+    knmodel = knmodel.fit(df_train, label_train)
+
+    # Calculate and display the accuracy of the training set
+    accuracy_training_knmodel = knmodel.score(df_train, label_train)
+    print("Scoring model (accuracy), on training dataset:", accuracy_training_knmodel)
+
+    # Compute Prediction on testing dataset
+    prediction_test = knmodel.predict(df_test)
+
+    # Calculate and display the accuracy of the testing set
+    accuracy_testing__knmodel = knmodel.score(df_test, label_test)
+    print("Scoring model (accuracy), on testing dataset:", accuracy_testing__knmodel)
+
+    # Calculate Evaluation Statistics
+    precision_knmodel, recall_knmodel = compute_evaluation_stats(label_test, prediction_test)
+
+    knmodel_stats = ['KNeighborsClassifier', accuracy_training_knmodel, accuracy_testing__knmodel, precision_knmodel,
+                     recall_knmodel]
+
+    return knmodel, knmodel_stats
+
+# -----------------------------------------------------------------------------
@@ -0,0 +1,68 @@
+import pandas as pd
+from sklearn import preprocessing
+
+# Function for getting mapping from Encoding function
+def get_integer_mapping(le):
+    '''
+    Return a dict mapping labels to their integer values
+    from an SKlearn LabelEncoder
+    le = a fitted SKlearn LabelEncoder
+    '''
+    res = {}
+    for cl in le.classes_:
+        res.update({cl:le.transform([cl])[0]})
+
+    return res
+
+def encode_fields(PandasDF, fields):
+    '''
+    Return the dataframe with the fields encoded and the mapping infos
+        INPUT:
+            - PandasDF  : Dataframe
+            - fields    : List of fields
+    '''
+    Mapping = []
+    for field in fields:
+        print("Encoding.. :",field)
+        TempDF = PandasDF.loc[:, field].copy()
+        TempDF.loc[TempDF.isnull()==True] = '-99'
+        Encoder = preprocessing.LabelEncoder()
+        Fitted_Encoder = Encoder.fit(TempDF)
+        Encoded_label = Fitted_Encoder.transform(TempDF)
+        Mapping.append([field, get_integer_mapping(Encoder)])
+        PandasDF.loc[:, field] = Encoded_label
+    
+    return PandasDF, Mapping
+
+# Function for scaling features 
+def scale_features(data_train, data_test=None):
+    ''' Feature scaling is a type of transformation that only changes the
+        scale, but not number of features. Because of this, we can still
+        use the original dataset's column names... so long as we keep in
+        mind that the _units_ have been altered:
+        
+        Method: preprocessing.StandardScaler()
+            
+        INPUT:
+            - data_train = Pandas dataframe for training scaling features
+            - data_test = Pandas dataframe to transform
+        
+        OUTPUT:
+            - data_train = data_train transformed
+            - data_test = data_test transformed
+        
+    '''
+
+    X = data_train.columns
+    transf = preprocessing.StandardScaler(with_mean=True).fit(data_train)
+    data_train = transf.transform(data_train)
+    data_train = pd.DataFrame(data=data_train, columns = X)
+    
+    if data_test is not None:
+        Y = data_test.columns
+        data_test = transf.transform(data_test)
+        data_test = pd.DataFrame(data=data_test, columns = Y)
+    else:
+        data_test = pd.DataFrame()
+
+    return data_train, data_test, transf