加载中 data_loader.py已删除 100644 → 0 +0 −95 原始行号 差异行号 差异行 from torch.utils.data import Dataset import numpy as np import torch import os import pickle class AliyunDataLoaderForFed(Dataset): def __init__(self,mode='train',semi=False,rank=1,world_size=3,num_keys=219) -> None: super().__init__() self.num_keys = num_keys x=np.load("../Fedlog/data/data_{}.npy".format(rank-1)) if semi: y=np.load("../Fedlog/data/semi_label_{}.npy".format(rank-1)) else: y=np.load("../Fedlog/data/label_{}.npy".format(rank-1)) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] with open("EventId2WordVecter.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] class AliyunDataLoader(Dataset): def __init__(self,mode='train') -> None: super().__init__() x=np.load("data/data.npy") y=np.load("data/label.npy") with open("EventId2WordVecter.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] class CMCCDataLoaderForFed(Dataset): def __init__(self,mode='train',semi=False,rank=1,world_size=3,num_keys=144) -> None: super().__init__() self.num_keys = num_keys data_path = "/home/zhangshenglin/chezeyu/log/cmcc_0929/data" x=np.load("{}/eventIndex_{}.npy".format(data_path,rank-1)) if semi: y=np.load("{}/semi_label_{}.npy".format(data_path,rank-1)) else: y=np.load("{}/label_{}.npy".format(data_path,rank-1)) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] with open("EventId2WordVecter_cmcc.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] No newline at end of file fed_main.py已删除 100644 → 0 +0 −211 原始行号 差异行号 差异行 from data_loader import AliyunDataLoaderForFed,CMCCDataLoaderForFed from model import robustlog import torch import torch.optim as optim import torch.nn as nn from torch.utils.data import DataLoader import os import copy import numpy as np from sklearn.metrics import classification_report, accuracy_score import pathlib import argparse parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=2020) parser.add_argument('--classes', type=int, default=2) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--learning_rate', type=float, default=0.001) parser.add_argument('--global_epochs', type=int, default=10) parser.add_argument('--local_epochs', type=int, default=10) parser.add_argument('--gpu_id', type=int, default=0) parser.add_argument('--embed_dim', type=int, default=768) parser.add_argument('--world_size', type=int, default=6) parser.add_argument('--out_dir', type=str, default='0917') parser.add_argument('--datatype', type=str, default='aliyun') parser.add_argument('--is_train', type=bool, default=True) # parser.add_argument('--is_semi', type=int, default=0) args = parser.parse_args() seed = args.seed classes = args.classes batch_size = args.batch_size learning_rate = args.learning_rate global_epochs = args.global_epochs local_epochs = args.local_epochs device = torch.device('cuda:{}'.format(args.gpu_id)) embed_dim = args.embed_dim world_size = args.world_size out_path = args.out_dir datatype = args.datatype is_train = args.is_train # is_semi = True if args.is_semi==1 else False is_semi = False out_dir = f"output/fedlog_{datatype}_logs_{out_path}_{global_epochs}e_{local_epochs}locEpoch_{learning_rate}lr_{batch_size}bs_{world_size}ws_{is_semi}semis" save_dir = os.path.join(out_dir, "model_save") def torch_seed(seed): import random random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def load_datasets_aliyun(rank,world_size): train_db = AliyunDataLoaderForFed(mode='train',semi=False,rank=rank,world_size=world_size) test_db = AliyunDataLoaderForFed(mode='test',semi=False,rank=rank,world_size=world_size) print(rank,len(train_db),len(test_db)) train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True,drop_last=True) test_loader = DataLoader(test_db,batch_size=batch_size) val_loader = test_loader return train_loader,val_loader,test_loader def load_datasets_cmcc(rank,world_size): train_db = CMCCDataLoaderForFed(mode='train',semi=False,rank=rank,world_size=world_size) test_db = CMCCDataLoaderForFed(mode='test',semi=False,rank=rank,world_size=world_size) train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True,drop_last=True) test_loader = DataLoader(test_db,batch_size=batch_size) val_loader = test_loader return train_loader,val_loader,test_loader # Federated averaging: FedAvg def FedAvg(w): w_avg = copy.deepcopy(w[0]) for k in w_avg.keys(): for i in range(1, len(w)): w_avg[k] += w[i][k] w_avg[k] = torch.div(w_avg[k], len(w)) return w_avg def cal_f1(label_list, pred_list,fw=None): label_arr = np.array(label_list) pred_arr = np.array(pred_list) # 异常检测 ad_label = np.where(label_arr>0,1,0) ad_pred = np.where(pred_arr>0,1,0) print("异常检测结果:") print(classification_report(ad_label,ad_pred)) if fw: fw.write("异常检测结果:\n") fw.write(classification_report(ad_label,ad_pred)) fw.write('\n\n') if __name__=='__main__': torch_seed(seed) print(out_dir,'\n',save_dir) if datatype == 'aliyun': loader_list = [load_datasets_aliyun(i,world_size) for i in range(1,world_size)] else: loader_list = [load_datasets_cmcc(i,world_size) for i in range(1,world_size)] client_list = [robustlog(300,10,2,device=device).to(device) for i in range(1,world_size)] # optimizer_client_list = [optim.Adam(client_list[i].parameters(), lr=learning_rate) for i in range(world_size-1)] # optimizer_server_list = [optim.Adam(server_list[i].parameters(), lr = learning_rate) for i in range(world_size-1)] best_score_list = [0 for _ in range(world_size-1)] if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(save_dir): os.mkdir(save_dir) fw_list = [open("./{}/log{}.txt".format(out_dir, i+1),'w') for i in range(0,world_size-1)] criteon = nn.CrossEntropyLoss().to(device) # save model parameter for i,model in enumerate(client_list): torch.save(model.state_dict(),os.path.join(save_dir,f"model_param{i}.pkl")) for it in range(global_epochs): print('*'*5,it,'*'*5) #train for i in range(world_size-1): client_model = client_list[i] optimizer_client = optim.SGD(client_list[i].parameters(), lr=learning_rate) client_model.train() train_loader = loader_list[i][0] for e in range(local_epochs): print('server',i ,'train epoch:', e) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) target[target!=0] = 1 # print(data.dtype,inputs.dtype) # logits = model(data) logits = client_model(data) loss = criteon(logits, target) optimizer_client.zero_grad() loss.backward() optimizer_client.step() client_model = FedAvg(list(map(lambda x:x.state_dict(),client_list))) for i in range(world_size-1): client_list[i].load_state_dict(client_model) for i in range(world_size-1): client_model = client_list[i] client_model.eval() val_loader = loader_list[i][1] #valid test_loss = 0 y_true = [] y_pred = [] for data, target in val_loader: target[target!=0] = 1 y_true.extend(target) data, target = data.to(device), target.to(device) logits = client_model(data) # logits = model(data) test_loss += criteon(logits, target).item() pred = logits.data.topk(1)[1].flatten().cpu() y_pred.extend(pred) test_loss /= len(val_loader.dataset) # F1_Score = f1_score(y_true, y_pred) acc = accuracy_score(y_true, y_pred) print('\n{},VALID set: Average loss: {:.4f},score:{}\n'.format( i,test_loss,round(acc.item(),4))) fw_list[i].write('\n{},VALID set: Average loss: {:.4f},score:{}\n'.format( it,test_loss,round(acc.item(),4))) if acc>best_score_list[i]: best_score_list[i] = acc torch.save(client_model.state_dict(),os.path.join(save_dir,f"model_param{i}.pkl")) for i in range(world_size-1): print("local server",i) client_model.load_state_dict(torch.load(os.path.join(save_dir,f"model_param{i}.pkl"))) client_model.eval() test_loader = loader_list[i][2] pred_list = [] label_list = [] for data, target in test_loader: target[target!=0] = 1 data, target = data.to(device), target.to(device) logits = client_model(data) pred = logits.data.topk(1)[1].flatten() pred_list.extend(list(pred.cpu())) label_list.extend(list(target.cpu())) cal_f1(label_list, pred_list,fw_list[i]) for f in fw_list: f.close() No newline at end of file model.py已删除 100644 → 0 +0 −60 原始行号 差异行号 差异行 import torch import torch.nn as nn from torch.autograd import Variable class robustlog(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_keys=2,device='cpu'): super(robustlog, self).__init__() self.device = device self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=0.5) self.num_directions = 2 self.fc1 = nn.Linear(hidden_size * self.num_directions, hidden_size) self.fc2 = nn.Linear(hidden_size, 2) self.attention_size = self.hidden_size self.w_omega = Variable( torch.zeros(self.hidden_size * self.num_directions, self.attention_size)) self.u_omega = Variable(torch.zeros(self.attention_size)) self.sequence_length = 100 def attention_net(self, lstm_output, device): output_reshape = torch.Tensor.reshape(lstm_output, [-1, self.hidden_size * self.num_directions]) attn_tanh = torch.tanh(torch.mm(output_reshape, self.w_omega.to(device))) attn_hidden_layer = torch.mm( attn_tanh, torch.Tensor.reshape(self.u_omega.to(device), [-1, 1])) exps = torch.Tensor.reshape(torch.exp(attn_hidden_layer), [-1, self.sequence_length]) alphas = exps / torch.Tensor.reshape(torch.sum(exps, 1), [-1, 1]) alphas_reshape = torch.Tensor.reshape(alphas, [-1, self.sequence_length, 1]) state = lstm_output attn_output = torch.sum(state * alphas_reshape, 1) return attn_output def forward(self, features): inp = features # inp = features[2] self.sequence_length = inp.shape[1] out, _ = self.lstm(inp) out = self.attention_net(out, self.device) out = self.fc1(out) out = self.fc2(out) return out if __name__ == "__main__": device = torch.device('cuda:0') model = robustlog(300,10,2,device=device).to(device) inp = torch.ones((64,20,300)).to(device) output = model(inp) print(output.shape) serial-semi-main.py已删除 100644 → 0 +0 −123 原始行号 差异行号 差异行 from data_loader import AliyunDataLoader from model import robustlog import torch import torch.optim as optim import torch.nn as nn from torch.utils.data import DataLoader import os import numpy as np from sklearn.metrics import classification_report, accuracy_score classes=2 batch_size = 64 learning_rate = 0.001 epoch = 10 device = torch.device('cuda:0') out_dir = "0916" output = f"output/{out_dir}_{epoch}e_{learning_rate}lr_{batch_size}bs" save_dir = os.path.join(output,"model_save") if not os.path.exists(output): os.mkdir(output) if not os.path.exists(save_dir): os.mkdir(save_dir) IS_TRAIN = True def load_datasets(): train_db = AliyunDataLoader(mode='train') test_db = AliyunDataLoader(mode='test') train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True) test_loader = DataLoader(test_db,batch_size=batch_size) return train_loader,test_loader def cal_f1(label_list, pred_list): # label_arr = np.array(label_list) # pred_arr = np.array(pred_list) # 异常检测 print("异常检测结果:") print(classification_report(label_list,pred_list)) return classification_report(label_list,pred_list) def main(): train_loader,test_loader = load_datasets() model = robustlog(300,10,2,device=device).to(device) optimizer = optim.SGD(model.parameters(), lr=learning_rate) criteon = nn.CrossEntropyLoss().to(device) f = open(os.path.join(output,"log.txt"),'w') if IS_TRAIN: # save model parameter torch.save(model.state_dict(),os.path.join(save_dir,f"model_param.pkl")) best_f1_score = 0 for e in range(epoch): print("*"*5,e+1,"*"*5) #train model.train() total_loss = 0 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) # print(data.shape) logits = model(data) loss = criteon(logits, target) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print('Train Epoch: [{}/{}]\tLoss: {:.6f}'.format( e+1, epoch, total_loss)) #valid model.eval() test_loss = 0 y_true = [] y_pred = [] for data, target in test_loader: y_true.extend(target) data, target = data.to(device), target.to(device) logits = model(data) test_loss += criteon(logits, target).item() pred = logits.data.topk(1)[1].flatten().cpu() y_pred.extend(pred) # test_loss /= len(test_loader.dataset) # F1_Score = f1_score(y_true, y_pred) F1_Score = accuracy_score(y_true, y_pred) print('\nVALID set: Average loss: {:.4f},F1-score:{}\n'.format( test_loss,round(F1_Score.item(),4))) f.write('\nVALID set: Average loss: {:.4f},F1-score:{}\n'.format( test_loss,round(F1_Score.item(),4))) if F1_Score>best_f1_score: best_f1_score = F1_Score torch.save(model.state_dict(),os.path.join(save_dir,f"model_param.pkl")) model.load_state_dict(torch.load(os.path.join(save_dir,f"model_param.pkl"))) pred_list = [] label_list = [] model.eval() for data, target in test_loader: data, target = data.to(device), target.to(device) logits = model(data) pred = logits.data.topk(1)[1].flatten() pred_list.extend(list(pred.cpu())) label_list.extend(list(target.cpu())) res = cal_f1(label_list, pred_list) f.write(res) f.close() if __name__=='__main__': main() No newline at end of file test已删除 100644 → 0 +0 −0 Empty file deleted. 加载中
data_loader.py已删除 100644 → 0 +0 −95 原始行号 差异行号 差异行 from torch.utils.data import Dataset import numpy as np import torch import os import pickle class AliyunDataLoaderForFed(Dataset): def __init__(self,mode='train',semi=False,rank=1,world_size=3,num_keys=219) -> None: super().__init__() self.num_keys = num_keys x=np.load("../Fedlog/data/data_{}.npy".format(rank-1)) if semi: y=np.load("../Fedlog/data/semi_label_{}.npy".format(rank-1)) else: y=np.load("../Fedlog/data/label_{}.npy".format(rank-1)) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] with open("EventId2WordVecter.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] class AliyunDataLoader(Dataset): def __init__(self,mode='train') -> None: super().__init__() x=np.load("data/data.npy") y=np.load("data/label.npy") with open("EventId2WordVecter.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] class CMCCDataLoaderForFed(Dataset): def __init__(self,mode='train',semi=False,rank=1,world_size=3,num_keys=144) -> None: super().__init__() self.num_keys = num_keys data_path = "/home/zhangshenglin/chezeyu/log/cmcc_0929/data" x=np.load("{}/eventIndex_{}.npy".format(data_path,rank-1)) if semi: y=np.load("{}/semi_label_{}.npy".format(data_path,rank-1)) else: y=np.load("{}/label_{}.npy".format(data_path,rank-1)) y[y!=0] = 1 _len = len(y) if mode == 'train': self.x = x[:int(_len*0.8)] self.y = y[:int(_len*0.8)] else: self.x = x[int(_len*0.8):] self.y = y[int(_len*0.8):] with open("EventId2WordVecter_cmcc.pickle",'br') as f: self.EventId2WordVecter = pickle.load(f) def __len__(self): return len(self.y) def __getitem__(self, index): eventId_list = self.x[index] x=[] for eventId in eventId_list: x.append(self.EventId2WordVecter[eventId]) return np.array(x).astype(np.float32),self.y[index] No newline at end of file
fed_main.py已删除 100644 → 0 +0 −211 原始行号 差异行号 差异行 from data_loader import AliyunDataLoaderForFed,CMCCDataLoaderForFed from model import robustlog import torch import torch.optim as optim import torch.nn as nn from torch.utils.data import DataLoader import os import copy import numpy as np from sklearn.metrics import classification_report, accuracy_score import pathlib import argparse parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=2020) parser.add_argument('--classes', type=int, default=2) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--learning_rate', type=float, default=0.001) parser.add_argument('--global_epochs', type=int, default=10) parser.add_argument('--local_epochs', type=int, default=10) parser.add_argument('--gpu_id', type=int, default=0) parser.add_argument('--embed_dim', type=int, default=768) parser.add_argument('--world_size', type=int, default=6) parser.add_argument('--out_dir', type=str, default='0917') parser.add_argument('--datatype', type=str, default='aliyun') parser.add_argument('--is_train', type=bool, default=True) # parser.add_argument('--is_semi', type=int, default=0) args = parser.parse_args() seed = args.seed classes = args.classes batch_size = args.batch_size learning_rate = args.learning_rate global_epochs = args.global_epochs local_epochs = args.local_epochs device = torch.device('cuda:{}'.format(args.gpu_id)) embed_dim = args.embed_dim world_size = args.world_size out_path = args.out_dir datatype = args.datatype is_train = args.is_train # is_semi = True if args.is_semi==1 else False is_semi = False out_dir = f"output/fedlog_{datatype}_logs_{out_path}_{global_epochs}e_{local_epochs}locEpoch_{learning_rate}lr_{batch_size}bs_{world_size}ws_{is_semi}semis" save_dir = os.path.join(out_dir, "model_save") def torch_seed(seed): import random random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def load_datasets_aliyun(rank,world_size): train_db = AliyunDataLoaderForFed(mode='train',semi=False,rank=rank,world_size=world_size) test_db = AliyunDataLoaderForFed(mode='test',semi=False,rank=rank,world_size=world_size) print(rank,len(train_db),len(test_db)) train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True,drop_last=True) test_loader = DataLoader(test_db,batch_size=batch_size) val_loader = test_loader return train_loader,val_loader,test_loader def load_datasets_cmcc(rank,world_size): train_db = CMCCDataLoaderForFed(mode='train',semi=False,rank=rank,world_size=world_size) test_db = CMCCDataLoaderForFed(mode='test',semi=False,rank=rank,world_size=world_size) train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True,drop_last=True) test_loader = DataLoader(test_db,batch_size=batch_size) val_loader = test_loader return train_loader,val_loader,test_loader # Federated averaging: FedAvg def FedAvg(w): w_avg = copy.deepcopy(w[0]) for k in w_avg.keys(): for i in range(1, len(w)): w_avg[k] += w[i][k] w_avg[k] = torch.div(w_avg[k], len(w)) return w_avg def cal_f1(label_list, pred_list,fw=None): label_arr = np.array(label_list) pred_arr = np.array(pred_list) # 异常检测 ad_label = np.where(label_arr>0,1,0) ad_pred = np.where(pred_arr>0,1,0) print("异常检测结果:") print(classification_report(ad_label,ad_pred)) if fw: fw.write("异常检测结果:\n") fw.write(classification_report(ad_label,ad_pred)) fw.write('\n\n') if __name__=='__main__': torch_seed(seed) print(out_dir,'\n',save_dir) if datatype == 'aliyun': loader_list = [load_datasets_aliyun(i,world_size) for i in range(1,world_size)] else: loader_list = [load_datasets_cmcc(i,world_size) for i in range(1,world_size)] client_list = [robustlog(300,10,2,device=device).to(device) for i in range(1,world_size)] # optimizer_client_list = [optim.Adam(client_list[i].parameters(), lr=learning_rate) for i in range(world_size-1)] # optimizer_server_list = [optim.Adam(server_list[i].parameters(), lr = learning_rate) for i in range(world_size-1)] best_score_list = [0 for _ in range(world_size-1)] if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(save_dir): os.mkdir(save_dir) fw_list = [open("./{}/log{}.txt".format(out_dir, i+1),'w') for i in range(0,world_size-1)] criteon = nn.CrossEntropyLoss().to(device) # save model parameter for i,model in enumerate(client_list): torch.save(model.state_dict(),os.path.join(save_dir,f"model_param{i}.pkl")) for it in range(global_epochs): print('*'*5,it,'*'*5) #train for i in range(world_size-1): client_model = client_list[i] optimizer_client = optim.SGD(client_list[i].parameters(), lr=learning_rate) client_model.train() train_loader = loader_list[i][0] for e in range(local_epochs): print('server',i ,'train epoch:', e) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) target[target!=0] = 1 # print(data.dtype,inputs.dtype) # logits = model(data) logits = client_model(data) loss = criteon(logits, target) optimizer_client.zero_grad() loss.backward() optimizer_client.step() client_model = FedAvg(list(map(lambda x:x.state_dict(),client_list))) for i in range(world_size-1): client_list[i].load_state_dict(client_model) for i in range(world_size-1): client_model = client_list[i] client_model.eval() val_loader = loader_list[i][1] #valid test_loss = 0 y_true = [] y_pred = [] for data, target in val_loader: target[target!=0] = 1 y_true.extend(target) data, target = data.to(device), target.to(device) logits = client_model(data) # logits = model(data) test_loss += criteon(logits, target).item() pred = logits.data.topk(1)[1].flatten().cpu() y_pred.extend(pred) test_loss /= len(val_loader.dataset) # F1_Score = f1_score(y_true, y_pred) acc = accuracy_score(y_true, y_pred) print('\n{},VALID set: Average loss: {:.4f},score:{}\n'.format( i,test_loss,round(acc.item(),4))) fw_list[i].write('\n{},VALID set: Average loss: {:.4f},score:{}\n'.format( it,test_loss,round(acc.item(),4))) if acc>best_score_list[i]: best_score_list[i] = acc torch.save(client_model.state_dict(),os.path.join(save_dir,f"model_param{i}.pkl")) for i in range(world_size-1): print("local server",i) client_model.load_state_dict(torch.load(os.path.join(save_dir,f"model_param{i}.pkl"))) client_model.eval() test_loader = loader_list[i][2] pred_list = [] label_list = [] for data, target in test_loader: target[target!=0] = 1 data, target = data.to(device), target.to(device) logits = client_model(data) pred = logits.data.topk(1)[1].flatten() pred_list.extend(list(pred.cpu())) label_list.extend(list(target.cpu())) cal_f1(label_list, pred_list,fw_list[i]) for f in fw_list: f.close() No newline at end of file
model.py已删除 100644 → 0 +0 −60 原始行号 差异行号 差异行 import torch import torch.nn as nn from torch.autograd import Variable class robustlog(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_keys=2,device='cpu'): super(robustlog, self).__init__() self.device = device self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=0.5) self.num_directions = 2 self.fc1 = nn.Linear(hidden_size * self.num_directions, hidden_size) self.fc2 = nn.Linear(hidden_size, 2) self.attention_size = self.hidden_size self.w_omega = Variable( torch.zeros(self.hidden_size * self.num_directions, self.attention_size)) self.u_omega = Variable(torch.zeros(self.attention_size)) self.sequence_length = 100 def attention_net(self, lstm_output, device): output_reshape = torch.Tensor.reshape(lstm_output, [-1, self.hidden_size * self.num_directions]) attn_tanh = torch.tanh(torch.mm(output_reshape, self.w_omega.to(device))) attn_hidden_layer = torch.mm( attn_tanh, torch.Tensor.reshape(self.u_omega.to(device), [-1, 1])) exps = torch.Tensor.reshape(torch.exp(attn_hidden_layer), [-1, self.sequence_length]) alphas = exps / torch.Tensor.reshape(torch.sum(exps, 1), [-1, 1]) alphas_reshape = torch.Tensor.reshape(alphas, [-1, self.sequence_length, 1]) state = lstm_output attn_output = torch.sum(state * alphas_reshape, 1) return attn_output def forward(self, features): inp = features # inp = features[2] self.sequence_length = inp.shape[1] out, _ = self.lstm(inp) out = self.attention_net(out, self.device) out = self.fc1(out) out = self.fc2(out) return out if __name__ == "__main__": device = torch.device('cuda:0') model = robustlog(300,10,2,device=device).to(device) inp = torch.ones((64,20,300)).to(device) output = model(inp) print(output.shape)
serial-semi-main.py已删除 100644 → 0 +0 −123 原始行号 差异行号 差异行 from data_loader import AliyunDataLoader from model import robustlog import torch import torch.optim as optim import torch.nn as nn from torch.utils.data import DataLoader import os import numpy as np from sklearn.metrics import classification_report, accuracy_score classes=2 batch_size = 64 learning_rate = 0.001 epoch = 10 device = torch.device('cuda:0') out_dir = "0916" output = f"output/{out_dir}_{epoch}e_{learning_rate}lr_{batch_size}bs" save_dir = os.path.join(output,"model_save") if not os.path.exists(output): os.mkdir(output) if not os.path.exists(save_dir): os.mkdir(save_dir) IS_TRAIN = True def load_datasets(): train_db = AliyunDataLoader(mode='train') test_db = AliyunDataLoader(mode='test') train_loader = DataLoader(train_db,batch_size=batch_size,shuffle=True) test_loader = DataLoader(test_db,batch_size=batch_size) return train_loader,test_loader def cal_f1(label_list, pred_list): # label_arr = np.array(label_list) # pred_arr = np.array(pred_list) # 异常检测 print("异常检测结果:") print(classification_report(label_list,pred_list)) return classification_report(label_list,pred_list) def main(): train_loader,test_loader = load_datasets() model = robustlog(300,10,2,device=device).to(device) optimizer = optim.SGD(model.parameters(), lr=learning_rate) criteon = nn.CrossEntropyLoss().to(device) f = open(os.path.join(output,"log.txt"),'w') if IS_TRAIN: # save model parameter torch.save(model.state_dict(),os.path.join(save_dir,f"model_param.pkl")) best_f1_score = 0 for e in range(epoch): print("*"*5,e+1,"*"*5) #train model.train() total_loss = 0 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) # print(data.shape) logits = model(data) loss = criteon(logits, target) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print('Train Epoch: [{}/{}]\tLoss: {:.6f}'.format( e+1, epoch, total_loss)) #valid model.eval() test_loss = 0 y_true = [] y_pred = [] for data, target in test_loader: y_true.extend(target) data, target = data.to(device), target.to(device) logits = model(data) test_loss += criteon(logits, target).item() pred = logits.data.topk(1)[1].flatten().cpu() y_pred.extend(pred) # test_loss /= len(test_loader.dataset) # F1_Score = f1_score(y_true, y_pred) F1_Score = accuracy_score(y_true, y_pred) print('\nVALID set: Average loss: {:.4f},F1-score:{}\n'.format( test_loss,round(F1_Score.item(),4))) f.write('\nVALID set: Average loss: {:.4f},F1-score:{}\n'.format( test_loss,round(F1_Score.item(),4))) if F1_Score>best_f1_score: best_f1_score = F1_Score torch.save(model.state_dict(),os.path.join(save_dir,f"model_param.pkl")) model.load_state_dict(torch.load(os.path.join(save_dir,f"model_param.pkl"))) pred_list = [] label_list = [] model.eval() for data, target in test_loader: data, target = data.to(device), target.to(device) logits = model(data) pred = logits.data.topk(1)[1].flatten() pred_list.extend(list(pred.cpu())) label_list.extend(list(target.cpu())) res = cal_f1(label_list, pred_list) f.write(res) f.close() if __name__=='__main__': main() No newline at end of file