Commit 46a2fd46 authored by Mohammad Imran Syed's avatar Mohammad Imran Syed
Browse files

add new scripts

parent a11d4d21
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : Mohammad Imran SYED (mohammad-imran.syed@lip6.fr)
This script also creates trace completeness plot for super-sniffers of size up to 10.
It uses the num_packets.csv file created by the script initial_analysis.py. So you can run this script
to modify or re-create figures instead of running initial_analysis.py again
Run this script in the Analysis/ directory where num_packets.csv file is saved.
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.rcParams['legend.title_fontsize'] = 'large'
plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']
f = "num_packets.csv"
df = pd.read_csv(f, delimiter='\t', squeeze=True)
x=list()
Y_MIN=list()
Y_MAX=list()
packets = df['Num_packets'].tolist()
traces = df['Trace'].tolist()
num_packets = [[] for _ in range(10)]
num_packets[0] = packets[0:10]
num_packets[1] = packets[10:55]
num_packets[2] = packets[55:175]
num_packets[3] = packets[175:385]
num_packets[4] = packets[385:637]
num_packets[5] = packets[637:847]
num_packets[6] = packets[847:967]
num_packets[7] = packets[967:1012]
num_packets[8] = packets[1012:1022]
num_packets[9] = packets[-1]
for i in range(len(num_packets)-1):
j = num_packets[i]
min_packets = min(j)
max_packets = max(j)
x.append(i+1)
Y_MIN.append(min_packets)
Y_MAX.append(max_packets)
x.append(i+2)
Y_MIN.append(packets[-1])
Y_MAX.append(packets[-1])
y_min = [i / max(Y_MIN) for i in Y_MIN]
y_max = [i / max(Y_MAX) for i in Y_MAX]
NUM_1v1 = num_packets[0].sort()
NUM_1v1 = num_packets[0][1:len(num_packets[0])-1]
NUM_1v1 = [i / max(Y_MAX) for i in NUM_1v1]
x1v1 = [x[0]] * len(NUM_1v1)
NUM_2v2 = num_packets[1].sort()
NUM_2v2 = num_packets[1][1:len(num_packets[1])-1]
NUM_2v2 = [i / max(Y_MAX) for i in NUM_2v2]
x2v2 = [x[1]] * len(NUM_2v2)
NUM_3v3 = num_packets[2].sort()
NUM_3v3 = num_packets[2][1:len(num_packets[2])-1]
NUM_3v3 = [i / max(Y_MAX) for i in NUM_3v3]
x3v3 = [x[2]] * len(NUM_3v3)
NUM_4v4 = num_packets[3].sort()
NUM_4v4 = num_packets[3][1:len(num_packets[3])-1]
NUM_4v4 = [i / max(Y_MAX) for i in NUM_4v4]
x4v4 = [x[3]] * len(NUM_4v4)
NUM_5v5 = num_packets[4].sort()
NUM_5v5 = num_packets[4][1:len(num_packets[4])-1]
NUM_5v5 = [i / max(Y_MAX) for i in NUM_5v5]
x5v5 = [x[4]] * len(NUM_5v5)
NUM_6v6 = num_packets[5].sort()
NUM_6v6 = num_packets[5][1:len(num_packets[5])-1]
NUM_6v6 = [i / max(Y_MAX) for i in NUM_6v6]
x6v6 = [x[5]] * len(NUM_6v6)
NUM_7v7 = num_packets[6].sort()
NUM_7v7 = num_packets[6][1:len(num_packets[6])-1]
NUM_7v7 = [i / max(Y_MAX) for i in NUM_7v7]
x7v7 = [x[6]] * len(NUM_7v7)
NUM_8v8 = num_packets[7].sort()
NUM_8v8 = num_packets[7][1:len(num_packets[7])-1]
NUM_8v8 = [i / max(Y_MAX) for i in NUM_8v8]
x8v8 = [x[7]] * len(NUM_8v8)
NUM_9v9 = num_packets[8].sort()
NUM_9v9 = num_packets[8][1:len(num_packets[8])-1]
NUM_9v9 = [i / max(Y_MAX) for i in NUM_9v9]
x9v9 = [x[8]] * len(NUM_9v9)
fig=plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y_min, label=r'$\rm C_{\rm min}^{\rm m}$', marker='o', ms=4)
ax.plot(x, y_max, label=r'$\rm C_{\rm max}^{\rm m}$', marker='x')
ax.scatter(x1v1, NUM_1v1, marker='+', c='grey', s=15, label='Intermediate Completeness')
ax.scatter(x2v2, NUM_2v2, marker='+', c='grey', s=15)
ax.scatter(x3v3, NUM_3v3, marker='+', c='grey', s=15)
ax.scatter(x4v4, NUM_4v4, marker='+', c='grey', s=15)
ax.scatter(x5v5, NUM_5v5, marker='+', c='grey', s=15)
ax.scatter(x6v6, NUM_6v6, marker='+', c='grey', s=15)
ax.scatter(x7v7, NUM_7v7, marker='+', c='grey', s=15)
ax.scatter(x8v8, NUM_8v8, marker='+', c='grey', s=15)
ax.scatter(x9v9, NUM_9v9, marker='+', c='grey', s=15)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(axis='both', labelsize=20)
plt.xticks(np.arange(0,11,1))
plt.yticks(np.arange(0,1.1,0.1))
plt.xlabel("Size of super-sniffer", fontsize=20)
plt.ylabel("Completeness", fontsize=20)
plt.legend(loc='best', frameon=False)
plt.tight_layout()
figure = "Completeness.pdf"
plt.savefig(figure, format='pdf')
num_packets[9] = [packets[-1]]
comp = [[int(j/max(Y_MAX)*100) for j in i] for i in num_packets]
comp = [item for sublist in comp for item in sublist]
d = {'Trace':traces,'Completeness':comp}
DF = pd.DataFrame(d)
DF.to_csv('Completeness.csv', sep='\t', index=False, header=True)
print("Figures created")
print("ANALYSIS DONE")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : Mohammad Imran SYED (mohammad-imran.syed@lip6.fr)
This script also creates trace completeness plot for super-sniffers of size up to 10,
but it adds percentage of improvement brought by super-sniffer of each size to the plot.
It uses the num_packets.csv file created by the script initial_analysis.py
Run this script in the Analysis/ directory where num_packets.csv file is saved.
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.rcParams['legend.title_fontsize'] = 'large'
plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']
f = "num_packets.csv"
df = pd.read_csv(f, delimiter='\t', squeeze=True)
x=list()
Y_MIN=list()
Y_MAX=list()
packets = df['Num_packets'].tolist()
traces = df['Trace'].tolist()
num_packets = [[] for _ in range(10)]
num_packets[0] = packets[0:10]
num_packets[1] = packets[10:55]
num_packets[2] = packets[55:175]
num_packets[3] = packets[175:385]
num_packets[4] = packets[385:637]
num_packets[5] = packets[637:847]
num_packets[6] = packets[847:967]
num_packets[7] = packets[967:1012]
num_packets[8] = packets[1012:1022]
num_packets[9] = packets[-1]
for i in range(len(num_packets)-1):
j = num_packets[i]
min_packets = min(j)
max_packets = max(j)
x.append(i+1)
Y_MIN.append(min_packets)
Y_MAX.append(max_packets)
x.append(i+2)
Y_MIN.append(packets[-1])
Y_MAX.append(packets[-1])
y_min = [i / max(Y_MIN) for i in Y_MIN]
y_max = [i / max(Y_MAX) for i in Y_MAX]
delta = np.diff(y_max)
delta = delta*100
delta = np.round(delta,1)
NUM_1v1 = num_packets[0].sort()
NUM_1v1 = num_packets[0][1:len(num_packets[0])-1]
NUM_1v1 = [i / max(Y_MAX) for i in NUM_1v1]
x1v1 = [x[0]] * len(NUM_1v1)
NUM_2v2 = num_packets[1].sort()
NUM_2v2 = num_packets[1][1:len(num_packets[1])-1]
NUM_2v2 = [i / max(Y_MAX) for i in NUM_2v2]
x2v2 = [x[1]] * len(NUM_2v2)
NUM_3v3 = num_packets[2].sort()
NUM_3v3 = num_packets[2][1:len(num_packets[2])-1]
NUM_3v3 = [i / max(Y_MAX) for i in NUM_3v3]
x3v3 = [x[2]] * len(NUM_3v3)
NUM_4v4 = num_packets[3].sort()
NUM_4v4 = num_packets[3][1:len(num_packets[3])-1]
NUM_4v4 = [i / max(Y_MAX) for i in NUM_4v4]
x4v4 = [x[3]] * len(NUM_4v4)
NUM_5v5 = num_packets[4].sort()
NUM_5v5 = num_packets[4][1:len(num_packets[4])-1]
NUM_5v5 = [i / max(Y_MAX) for i in NUM_5v5]
x5v5 = [x[4]] * len(NUM_5v5)
NUM_6v6 = num_packets[5].sort()
NUM_6v6 = num_packets[5][1:len(num_packets[5])-1]
NUM_6v6 = [i / max(Y_MAX) for i in NUM_6v6]
x6v6 = [x[5]] * len(NUM_6v6)
NUM_7v7 = num_packets[6].sort()
NUM_7v7 = num_packets[6][1:len(num_packets[6])-1]
NUM_7v7 = [i / max(Y_MAX) for i in NUM_7v7]
x7v7 = [x[6]] * len(NUM_7v7)
NUM_8v8 = num_packets[7].sort()
NUM_8v8 = num_packets[7][1:len(num_packets[7])-1]
NUM_8v8 = [i / max(Y_MAX) for i in NUM_8v8]
x8v8 = [x[7]] * len(NUM_8v8)
NUM_9v9 = num_packets[8].sort()
NUM_9v9 = num_packets[8][1:len(num_packets[8])-1]
NUM_9v9 = [i / max(Y_MAX) for i in NUM_9v9]
x9v9 = [x[8]] * len(NUM_9v9)
fig=plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y_max, label=r'$\rm C_{\rm max}^{\rm m}$', marker='x', ms=4, color='tab:orange')
ax.plot(x, y_min, label=r'$\rm C_{\rm min}^{\rm m}$', marker='o', ms=4, color='tab:blue')
ax.scatter(x1v1, NUM_1v1, marker='+', c='grey', s=14, label=r'$\rm C_{\rm .}^{\rm m}$')
ax.scatter(x2v2, NUM_2v2, marker='+', c='grey', s=14)
ax.scatter(x3v3, NUM_3v3, marker='+', c='grey', s=14)
ax.scatter(x4v4, NUM_4v4, marker='+', c='grey', s=14)
ax.scatter(x5v5, NUM_5v5, marker='+', c='grey', s=14)
ax.scatter(x6v6, NUM_6v6, marker='+', c='grey', s=14)
ax.scatter(x7v7, NUM_7v7, marker='+', c='grey', s=14)
ax.scatter(x8v8, NUM_8v8, marker='+', c='grey', s=14)
ax.scatter(x9v9, NUM_9v9, marker='+', c='grey', s=14)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(axis='both', labelsize=20)
plt.xticks(np.arange(0,11,1))
plt.yticks(np.arange(0,1.1,0.1))
plt.xlabel("Size of super-sniffer", fontsize=20)
plt.ylabel("Completeness", fontsize=20)
plt.legend(loc='center', prop={'size': 14}, frameon=False)
plt.tight_layout()
for x,y in zip(x,delta):
label = "{:.1f}".format(y)
label = '+'+str(label)
plt.annotate(label, # this is the text
(x+1,y_max[x]), # these are the coordinates to position the label
textcoords="offset points", # how to position the text
xytext=(-8,8), # distance from text to points (x,y)
ha='center') # horizontal alignment can be left, right or center
figure = "Completeness_gradient.pdf"
plt.savefig(figure, format='pdf')
print("Figures created")
print("ANALYSIS DONE")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : Mohammad Imran SYED (mohammad-imran.syed@lip6.fr)
Script for extracting relevant information from .pcap files to save it in .txt files
And also anonymize the traces (sender MAC addresses) and save them in AnonymizedTraces/ folder
"""
import time
import hashlib
import subprocess
import os
import pandas as pd
import numpy as np
import re
class PCAP():
def parsing(self, file):
csvtraces = self.directory + 'CSVTraces/'
if not os.path.exists(csvtraces):
os.makedirs(csvtraces)
os.chmod(csvtraces, 0o777)
tSharkOutputFile = csvtraces + file[0:len(file)-5] + '.txt'
tSharkOut = open(tSharkOutputFile, "wb")
tSharkInputFile = self.directory + file
tSharkCall = ['tshark', '-r', tSharkInputFile, '-Y', '!_ws.malformed and wlan_radio.channel==1', '-T', 'fields', '-E', 'header=y', '-E', 'separator=/t', '-e', 'frame.number', '-e', 'frame.time_epoch', '-e', 'frame.time_relative', '-e', 'wlan.fixed.timestamp', '-e', 'wlan_radio.signal_dbm', '-e', 'wlan_radio.channel', '-e', 'wlan.fc.type', '-e', 'wlan.fc.type_subtype', '-e', 'wlan.fc.retry', '-e', 'wlan.fcs', '-e', 'wlan.sa', '-e', 'wlan.seq', '-e', 'wlan.frag']
tSharkProc = subprocess.Popen(tSharkCall, stdout=tSharkOut)
tSharkProc.wait()
tSharkOut.close()
trace=pd.read_csv(tSharkOutputFile, delimiter='\t', squeeze=True)
trace=trace.rename(columns={"frame.number":"Frame_number", "frame.time_epoch":"Frame_time_epoch", "frame.time_relative":"Frame_time_relative", "wlan.fixed.timestamp":"Fixed_timestamp", "wlan_radio.signal_dbm":"RSSI_dBm", "wlan_radio.channel":"Channel", "wlan.fc.type":"Frame_type", "wlan.fc.type_subtype":"Frame_subtype", "wlan.fc.retry":"Retransmission", "wlan.fcs":"Checksum", "wlan.sa":"Source_MAC_address", "wlan.seq":"Sequence_number", "wlan.frag":"Fragment_number"})
trace['Source_MAC_address'] = trace['Source_MAC_address'].fillna(0)
trace['Sequence_number'] = trace['Sequence_number'].fillna(0)
trace = trace.astype({"RSSI_dBm":int, "Channel":int, "Sequence_number":int})
trace.to_csv(tSharkOutputFile, index=False, header=True, sep='\t')
anontraces = self.directory + 'AnonymizedTraces/'
if not os.path.exists(anontraces):
os.makedirs(anontraces)
os.chmod(anontraces, 0o777)
c_size=1000
AnonymizedOutputFile = file[0:len(file)-5] + '.txt'
AnonymizedFilePath = anontraces + AnonymizedOutputFile
if os.path.exists(AnonymizedFilePath):
os.remove(AnonymizedFilePath)
for trace in pd.read_csv(tSharkOutputFile, delimiter='\t', chunksize=c_size):
#trace = pd.read_csv(tSharkOutputFile, delimiter='\t', squeeze=True)
all_src_mac = trace['Source_MAC_address'].tolist()
uniq_src_macs = np.unique(all_src_mac)
replacements_src = {}
for mac in range(len(uniq_src_macs)):
replacements_src[uniq_src_macs[mac]]=str(uniq_src_macs[mac]).encode()
replacements_src[uniq_src_macs[mac]]=hashlib.sha512(replacements_src[uniq_src_macs[mac]]).hexdigest()
replacements_src[uniq_src_macs[mac]] = replacements_src[uniq_src_macs[mac]][:64]
src_mac_anon=list()
for i in range(len(all_src_mac)):
if all_src_mac[i] in replacements_src:
all_src_mac[i] = replacements_src[all_src_mac[i]]
src_mac_anon.append(all_src_mac[i])
trace['Source_MAC_address'] = all_src_mac
if not os.path.exists(AnonymizedFilePath):
trace.to_csv(AnonymizedFilePath, index=False, header=True, sep='\t')
else:
trace.to_csv(AnonymizedFilePath, index=False, header=False, mode='a', sep='\t')
def main():
start = time.time()
anonymize_pcap = PCAP()
anonymize_pcap.directory = 'Traces/' # the directory where you have saved the .pcap files
files = os.listdir(anonymize_pcap.directory)
files.sort()
files = files[2:len(files)-4]
files.sort(key=lambda f: int(re.sub('\D', '', f)))
k=0
while k<len(files):
print("Anonymizing file : " + files[k])
anonymize_pcap.parsing(files[k])
k+=1
print("Anonymized")
end = time.time()
print("ALL FILES HAVE BEEN ANONYMIZED")
print("------------------------------------------------------------")
print("Time taken by the program in seconds: ", end-start)
print("------------------------------------------------------------")
if __name__ == '__main__':
main()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Author : Mohammad Imran SYED (mohammad-imran.syed@lip6.fr)
This script is used to create the trace completeness plot for super-sniffers of size up to 10.
It creates all the necessary files for analysis those can be then used for
creating plots, instead of doing all calculations of this in this script again and again.
So you need to run this script first and then you analysis.py to modify plots
according to your liking.
Run this script in Analysis/ directory where you have the folders for all merged files
"""
import csv
import os
import matplotlib.pyplot as plt
import numpy as np
import re
import time
import pandas as pd
plt.rcParams['legend.title_fontsize'] = 'large'
#plt.rc('legend',fontsize=10)
plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']
start = time.time()
f1 = "num_packets.csv"
f1 = open(f1, "w+")
x=list()
Y_MIN=list()
Y_MAX=list()
num_packets = [[] for _ in range(10)]
traces = [[] for _ in range(10)]
packets = [[] for _ in range(10)] # lists of packets to find the maximum and minimum values for each combination
folders = [f.path for f in os.scandir() if f.is_dir()]
folders.sort(key=lambda f: int(re.sub('\D', '', f)))
for k in range(len(folders)):
files = os.listdir(folders[k])
if files != []:
#files.sort()
folders.sort(key=lambda f: int(re.sub('\D', '', f)))
#print(files)
for l in range(len(files)):
if k==0:
file = open(folders[k]+"/"+files[l], "r+")
reader = csv.reader(file)
lines= len(list(reader)) - 1
file.close()
else:
file = pd.read_csv(folders[k]+"/"+files[l], delimiter='\t', squeeze=True)
lines = file.values[0][1]
if l==0:
packets[k].append(lines)
else:
packets[k][l] = packets[k][l] + lines
num_packets[k].append(lines)
traces[k].append(files[l][0:len(files[l])-4])
for i in range(len(packets)):
min_packets = min(packets[i])
max_packets = max(packets[i])
x.append(i+1)
Y_MIN.append(min_packets)
Y_MAX.append(max_packets)
f1.write("Trace\tNum_packets\n")
for k in range(len(num_packets)):
for l in range(len(num_packets[k])):
f1.write(str(traces[k][l]) + "\t" + str(num_packets[k][l]) + "\n")
f1.close()
y_min = [i / max(Y_MIN) for i in Y_MIN]
y_max = [i / max(Y_MAX) for i in Y_MAX]
NUM_1v1 = num_packets[0].sort()
NUM_1v1 = num_packets[0][1:len(num_packets[0])-1]
NUM_1v1 = [i / max(Y_MAX) for i in NUM_1v1]
x1v1 = [x[0]] * len(NUM_1v1)
NUM_2v2 = num_packets[1].sort()
NUM_2v2 = num_packets[1][1:len(num_packets[1])-1]
NUM_2v2 = [i / max(Y_MAX) for i in NUM_2v2]
x2v2 = [x[1]] * len(NUM_2v2)
NUM_3v3 = num_packets[2].sort()
NUM_3v3 = num_packets[2][1:len(num_packets[2])-1]
NUM_3v3 = [i / max(Y_MAX) for i in NUM_3v3]
x3v3 = [x[2]] * len(NUM_3v3)
NUM_4v4 = num_packets[3].sort()
NUM_4v4 = num_packets[3][1:len(num_packets[3])-1]
NUM_4v4 = [i / max(Y_MAX) for i in NUM_4v4]
x4v4 = [x[3]] * len(NUM_4v4)
NUM_5v5 = num_packets[4].sort()
NUM_5v5 = num_packets[4][1:len(num_packets[4])-1]
NUM_5v5 = [i / max(Y_MAX) for i in NUM_5v5]
x5v5 = [x[4]] * len(NUM_5v5)
NUM_6v6 = num_packets[5].sort()
NUM_6v6 = num_packets[5][1:len(num_packets[5])-1]
NUM_6v6 = [i / max(Y_MAX) for i in NUM_6v6]
x6v6 = [x[5]] * len(NUM_6v6)
NUM_7v7 = num_packets[6].sort()
NUM_7v7 = num_packets[6][1:len(num_packets[6])-1]
NUM_7v7 = [i / max(Y_MAX) for i in NUM_7v7]
x7v7 = [x[6]] * len(NUM_7v7)
NUM_8v8 = num_packets[7].sort()
NUM_8v8 = num_packets[7][1:len(num_packets[7])-1]
NUM_8v8 = [i / max(Y_MAX) for i in NUM_8v8]
x8v8 = [x[7]] * len(NUM_8v8)
NUM_9v9 = num_packets[8].sort()
NUM_9v9 = num_packets[8][1:len(num_packets[8])-1]
NUM_9v9 = [i / max(Y_MAX) for i in NUM_9v9]
x9v9 = [x[8]] * len(NUM_9v9)
a=list()
a.append(Y_MIN[-1]/max(Y_MIN))
a.append(Y_MAX[-1]/max(Y_MAX))