Skip to content

Commit 3e05814

Browse files
committed
Preparazione serata 5
1 parent 5a85415 commit 3e05814

File tree

118 files changed

+3629
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+3629
-0
lines changed
46.1 KB
Loading

Flask/Flask05/burlesco70/utility/AzureMaps.ipynb

Lines changed: 882 additions & 0 deletions
Large diffs are not rendered by default.

Flask/Flask05/burlesco70/utility/__init__.py

Whitespace-only changes.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import aiohttp
2+
import asyncio
3+
import pandas as pd
4+
import datetime
5+
from tqdm import tqdm
6+
from utility import get_folder_path
7+
from yaml_utility import read_yaml_file
8+
9+
config_base_path = get_folder_path('./')
10+
configurations = read_yaml_file(config_base_path, 'azuremaps_config.yml')
11+
12+
client_id = configurations['client_id']
13+
subscription_id = configurations['subscription_id']
14+
subscription_key = configurations['subscription_key']
15+
language = configurations['language']
16+
country = configurations['country']
17+
18+
19+
async def azure_map_geo_search(df, key, columns, config_path='', config_filename='', error_remove=True):
20+
"""
21+
Search Latitude and Longitude about a city using a query
22+
This service use Azure Maps so you will need a subscription key for the service
23+
Please use a yaml file with your subscription keys
24+
If you want more specifications about yaml file please see your project documentation
25+
26+
For query creation need the dataframe and a specific column used as a Key
27+
You also need a list of columns for the research (like: Comune, Provincia, Nazione, ...)
28+
29+
Optionally you can remove dataframe rows that return error from the API Call.
30+
This is usefull to keep only significant features
31+
32+
"""
33+
34+
# Get service configurations from yaml file
35+
if config_path == '' or config_path is None:
36+
config_path = get_folder_path('./')
37+
38+
if config_filename == '' or config_filename is None:
39+
config_filename = '../../../LAVORO/DataLab/Rework/DatalabV1_Rework/Code/utility/azuremaps_config.yml'
40+
41+
print(f"Config filename: {config_filename}, config path: {config_path}")
42+
configurations = read_yaml_file(config_path, config_filename)
43+
44+
client_id = configurations['client_id']
45+
subscription_id = configurations['subscription_id']
46+
subscription_key = configurations['subscription_key']
47+
48+
language = configurations['language']
49+
country = configurations['country']
50+
51+
# Define the default service uri
52+
# Documentation: https://docs.microsoft.com/en-us/rest/api/maps/search/getsearchaddress
53+
service_uri = "https://atlas.microsoft.com/search/address/json?subscription-key={}&api-version=1.0&query={}&countrySet={}&language={}"
54+
55+
# Define usefull lists and variables
56+
latitude = []
57+
longitude = []
58+
errors = []
59+
60+
# Open the session for API Call
61+
session = aiohttp.ClientSession()
62+
63+
for i, el in enumerate(tqdm(df[key].tolist())):
64+
65+
# Generate the query for the search
66+
query = ''
67+
for k, col in enumerate(columns):
68+
search_object = df.loc[i, columns[k]]
69+
query = query + str(search_object) + ', '
70+
71+
request = service_uri.format(subscription_key, query, country, language)
72+
73+
try:
74+
response = await (await session.get(request)).json()
75+
76+
# response['results'][0]['address']['countrySecondarySubdivision']
77+
latitude.append(response['results'][0]['position']['lat'])
78+
longitude.append(response['results'][0]['position']['lon'])
79+
80+
except Exception as message:
81+
print(f"Impossibile to get information for element {i} about: {query} because: {message}")
82+
errors.append([i, el])
83+
continue
84+
85+
# Close the session
86+
session = await(session.close())
87+
print(f"Download completed with {len(errors)} errors. Please check the errors list to see informations")
88+
89+
# Remove errors (Optional)
90+
if error_remove:
91+
print("Removing errors from original dataframe")
92+
# Remove errors from original dataframe if there are into the previous procedure
93+
df_no_errors = df.copy()
94+
95+
if errors != []:
96+
for i, e in enumerate(errors):
97+
codice = e[1]
98+
99+
indexNames = df[df['key'] == codice].index
100+
101+
# Delete these row indexes from dataFrame
102+
df.drop(indexNames, inplace=True)
103+
104+
print(f"Removed errors {codice} in position: {indexNames}")
105+
106+
# Create columns into the result_dataframe
107+
df['Latitude'] = latitude
108+
df['Longitude'] = longitude
109+
110+
print(f"Result Dataframe Shape: {df_no_errors.shape}")
111+
112+
return df
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
client_id: "7bd04840-71e5-4756-8034-80a300be8d5e"
2+
subscription_id: "8b4109a9-6106-434c-9bc6-cf6324357454"
3+
subscription_key: "PiEFZPiPl8ibQudVmL6e-wcAI7-Y92UkvvK57uCHcJc"
4+
language: "it-IT"
5+
country: "it"
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Mon Mar 16 10:59:17 2020
4+
5+
@author: alborsa1
6+
"""
7+
# %% Import Libraries
8+
import pandas as pd
9+
from sklearn.neighbors import KNeighborsClassifier
10+
from sklearn.model_selection import train_test_split
11+
import matplotlib.pyplot as plt
12+
from sklearn import preprocessing
13+
import sklearn.metrics as metrics
14+
15+
16+
# %% Function for Features Engineering
17+
18+
# -----------------------------------------------------------------------------
19+
# Function for getting mapping from Encoding function
20+
def get_integer_mapping(le):
21+
'''
22+
Return a dict mapping labels to their integer values
23+
from an SKlearn LabelEncoder
24+
le = a fitted SKlearn LabelEncoder
25+
'''
26+
res = {}
27+
for cl in le.classes_:
28+
res.update({cl: le.transform([cl])[0]})
29+
30+
return res
31+
32+
33+
def Encode_fields(PandasDF, fields):
34+
'''
35+
Return the dataframe with the fields encoded and the mapping infos
36+
INPUT:
37+
- PandasDF : Dataframe
38+
- fields : List of fields
39+
'''
40+
Mapping = []
41+
for field in fields:
42+
print("Encoding.. :", field)
43+
TempDF = PandasDF.loc[:, field].copy()
44+
TempDF.loc[TempDF.isnull() == True] = '-99'
45+
Encoder = preprocessing.LabelEncoder()
46+
Fitted_Encoder = Encoder.fit(TempDF)
47+
Encoded_label = Fitted_Encoder.transform(TempDF)
48+
Mapping.append([field, get_integer_mapping(Encoder)])
49+
PandasDF.loc[:, field] = Encoded_label
50+
51+
return PandasDF, Mapping
52+
53+
54+
# -----------------------------------------------------------------------------
55+
56+
# Function for scaling features
57+
def scaleFeaturesDF(data_train):
58+
''' Feature scaling is a type of transformation that only changes the
59+
scale, but not number of features. Because of this, we can still
60+
use the original dataset's column names... so long as we keep in
61+
mind that the _units_ have been altered:
62+
63+
Method: preprocessing.StandardScaler()
64+
65+
INPUT:
66+
- df : Pandas dataframe for training scaling features
67+
68+
OUTPUT:
69+
- data_train : data_train transformed
70+
- transf : model used for scaling variable of Training Dataset
71+
72+
'''
73+
X = data_train.columns
74+
75+
transf = preprocessing.StandardScaler(with_mean=True).fit(data_train)
76+
data_train = transf.transform(data_train)
77+
data_train = pd.DataFrame(data=data_train, columns=X)
78+
79+
return data_train, transf
80+
81+
82+
# -----------------------------------------------------------------------------
83+
84+
# %% Function for Training Classification
85+
86+
# Split in train and test datasest
87+
def split(split_dataset, X, y, perc_testing):
88+
'''
89+
Input:
90+
- split_dataset : True or False
91+
- X : Features Dataset
92+
- y : Label Dataset
93+
- perc_testing : dataset percentage to assign to testing phase
94+
Output:
95+
- data_train : dataset to train model
96+
- data_test : dataset to test model
97+
- label_train : label of train dataset
98+
- label_test : labelt of test dataset
99+
'''
100+
if split_dataset:
101+
data_train, data_test, label_train, label_test = train_test_split(X, y, test_size=perc_testing, random_state=7)
102+
print("##--**: Complete to Split Dataset")
103+
print("Testing dataset dimension equal to", perc_testing * 100, "% of the initial dataset")
104+
else:
105+
data_train, data_test, label_train, label_test = X, X, y, y
106+
print("##--**: Dataset no splitted")
107+
108+
return data_train, data_test, label_train, label_test
109+
110+
111+
# -----------------------------------------------------------------------------
112+
113+
# -----------------------------------------------------------------------------
114+
# Function for computing classification evaluation
115+
def compute_evaluation_stats(label_test, prediction_test):
116+
'''
117+
118+
'''
119+
y_true = label_test.values
120+
y_pred = prediction_test
121+
columns = ['Closed', 'Open']
122+
confusion = metrics.confusion_matrix(y_true, y_pred)
123+
plt.imshow(confusion, cmap=plt.cm.Blues, interpolation='nearest')
124+
plt.xticks([0, 1], columns, rotation='vertical')
125+
plt.yticks([0, 1], columns)
126+
plt.colorbar()
127+
plt.show()
128+
129+
tn, fp, fn, tp = metrics.confusion_matrix(y_true, y_pred).ravel()
130+
print("True Positive:", tp)
131+
print("True Negative:", tn)
132+
print("False Positive:", fp)
133+
print("False Negative:", fn)
134+
135+
precision = tp / (tp + fp)
136+
recall = tp / (tp + fn)
137+
print("Precision:", precision)
138+
print("Recall:", recall)
139+
140+
return precision, recall
141+
142+
143+
# -----------------------------------------------------------------------------
144+
145+
# -----------------------------------------------------------------------------
146+
# Function for K-Neighbors
147+
def kneigh(df_train, df_test, label_train, label_test):
148+
'''
149+
Function for Training a K-Neighbors Classifier
150+
Input:
151+
- df_train : Features dataset for training model
152+
- df_test : Features dataset for testing model
153+
- label_train : Label training dataset
154+
- label_test : Label testing dataset
155+
156+
Output:
157+
- knmodel : Model
158+
- knmodel_stats : Statistics about the model
159+
'''
160+
# Set model parameters
161+
print("##--**: Computing K-Neighbors classifier..")
162+
neighbors = 5
163+
print("##--**: N-Neighbors:", neighbors)
164+
165+
# Define model
166+
knmodel = KNeighborsClassifier(n_neighbors=neighbors, weights='uniform')
167+
168+
# Train model
169+
print("##--**.a: Train KNeighborsClassifier model..")
170+
knmodel = knmodel.fit(df_train, label_train)
171+
172+
# Calculate and display the accuracy of the training set
173+
accuracy_training_knmodel = knmodel.score(df_train, label_train)
174+
print("Scoring model (accuracy), on training dataset:", accuracy_training_knmodel)
175+
176+
# Compute Prediction on testing dataset
177+
prediction_test = knmodel.predict(df_test)
178+
179+
# Calculate and display the accuracy of the testing set
180+
accuracy_testing__knmodel = knmodel.score(df_test, label_test)
181+
print("Scoring model (accuracy), on testing dataset:", accuracy_testing__knmodel)
182+
183+
# Calculate Evaluation Statistics
184+
precision_knmodel, recall_knmodel = compute_evaluation_stats(label_test, prediction_test)
185+
186+
knmodel_stats = ['KNeighborsClassifier', accuracy_training_knmodel, accuracy_testing__knmodel, precision_knmodel,
187+
recall_knmodel]
188+
189+
return knmodel, knmodel_stats
190+
191+
# -----------------------------------------------------------------------------
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import pandas as pd
2+
from sklearn import preprocessing
3+
4+
# Function for getting mapping from Encoding function
5+
def get_integer_mapping(le):
6+
'''
7+
Return a dict mapping labels to their integer values
8+
from an SKlearn LabelEncoder
9+
le = a fitted SKlearn LabelEncoder
10+
'''
11+
res = {}
12+
for cl in le.classes_:
13+
res.update({cl:le.transform([cl])[0]})
14+
15+
return res
16+
17+
def encode_fields(PandasDF, fields):
18+
'''
19+
Return the dataframe with the fields encoded and the mapping infos
20+
INPUT:
21+
- PandasDF : Dataframe
22+
- fields : List of fields
23+
'''
24+
Mapping = []
25+
for field in fields:
26+
print("Encoding.. :",field)
27+
TempDF = PandasDF.loc[:, field].copy()
28+
TempDF.loc[TempDF.isnull()==True] = '-99'
29+
Encoder = preprocessing.LabelEncoder()
30+
Fitted_Encoder = Encoder.fit(TempDF)
31+
Encoded_label = Fitted_Encoder.transform(TempDF)
32+
Mapping.append([field, get_integer_mapping(Encoder)])
33+
PandasDF.loc[:, field] = Encoded_label
34+
35+
return PandasDF, Mapping
36+
37+
# Function for scaling features
38+
def scale_features(data_train, data_test=None):
39+
''' Feature scaling is a type of transformation that only changes the
40+
scale, but not number of features. Because of this, we can still
41+
use the original dataset's column names... so long as we keep in
42+
mind that the _units_ have been altered:
43+
44+
Method: preprocessing.StandardScaler()
45+
46+
INPUT:
47+
- data_train = Pandas dataframe for training scaling features
48+
- data_test = Pandas dataframe to transform
49+
50+
OUTPUT:
51+
- data_train = data_train transformed
52+
- data_test = data_test transformed
53+
54+
'''
55+
56+
X = data_train.columns
57+
transf = preprocessing.StandardScaler(with_mean=True).fit(data_train)
58+
data_train = transf.transform(data_train)
59+
data_train = pd.DataFrame(data=data_train, columns = X)
60+
61+
if data_test is not None:
62+
Y = data_test.columns
63+
data_test = transf.transform(data_test)
64+
data_test = pd.DataFrame(data=data_test, columns = Y)
65+
else:
66+
data_test = pd.DataFrame()
67+
68+
return data_train, data_test, transf

0 commit comments

Comments
 (0)