我们所使用的数据集为Istego100K,训练集中包含100000张图像,其中包含33404张用nsf5嵌密的图片,需要将根据数据集提供的标签数据将其提取出来。
标签大致如下:
parameters={
"000001.jpg":{ # parameters for stego-file
"quality": 95, # quality factor
"rate": 0.4, # embedding rate (payload)
"steg_algorithm": "nsf5" # steganographic algorithm
},
"000002.jpg":{ # parameters for cover-file
"quality": 90 # quality factor
}
}
首先我们需要借助工具将json数据转化为csv文件,代码如下:
import sys
import json
import csv
import xlsxwriter
out = []
arg=""
i=0
verbose=False
#parse input parameters
if len(sys.argv) == 4:
for i,arg in enumerate(sys.argv):
if("-v" in arg):
sys.argv.pop(i)
verbose = True
if(verbose):
fileDir = str(sys.argv[1])
outFile = str(sys.argv[2])
else:
print("Correct usage is : python jsontoexcel.py
print("Example : python jsontoexcel.py myfile.json")
print("will output two files: myfile.csv myfile.xlsx\n")
print("Whereas : python jsontoexcel.py myfile.json output")
print("will output two files: output.csv output.xlsx")
print("you can use the -v flag for verbose output")
elif len(sys.argv) == 3:
for i,arg in enumerate(sys.argv):
if("-v" in arg):
sys.argv.pop(i)
verbose = True
if(verbose):
fileDir = str(sys.argv[1])
outFile = "./" + str(sys.argv[1]).split(".")[0]
else:
fileDir = str(sys.argv[1])
outFile = str(sys.argv[2])
elif len(sys.argv) == 2:
for i,arg in enumerate(sys.argv):
if("-v" in arg):
print("Correct usage is : python jsontoexcel.py
print("Example : python jsontoexcel.py myfile.json")
print("will output two files: myfile.csv myfile.xlsx\n")
print("Whereas : python jsontoexcel.py myfile.json output")
print("will output two files: output.csv output.xlsx")
print("you can use the -v flag for verbose output")
sys.exit(-1)
fileDir = str(sys.argv[1])
outFile = "./"+str(sys.argv[1]).split(".")[0]
else:
print ("Correct usage is : python jsontoexcel.py
print("Example : python jsontoexcel.py myfile.json")
print("will output two files: myfile.csv myfile.xlsx\n")
print("Whereas : python jsontoexcel.py myfile.json output")
print("will output two files: output.csv output.xlsx")
print("you can use the -v flag for verbose output")
sys.exit(-1)
#flattens a tree object consisted of dictionaries and lists
def flatten_json(y):
print("flattening json file recursivelly")
list2 = []
labels = []
depth = []
global count
count = 0
#flatten each row of the root list
if type(y) is dict:
for j in y.values() :
#print(j)
out,lbl,cnt=flatten(j,' ')
if verbose:
print("Sub tree:" + str(out))
depth.append(cnt)
labels.append(lbl)
#print(out)
list2.append(out)
elif isinstance(y, list):
for j in y :
#print(j)
out,lbl,cnt=flatten(j,' ')
if verbose:
print("Sub tree:" + str(out))
depth.append(cnt)
labels.append(lbl)
#print(out)
list2.append(out)
label=[]
#find max path in json tree
label.append( max(labels, key=len))
if verbose:
print("labels:"+str(label))
list2 = label + list2
if verbose:
print (list2)
return (list2)
labels = []
#explore a tree with recursion and flatten to list
def flatten(x,name):
out=[]
label=[]
count=0
if type(x) is dict:
for a in x:
tmp,nm,cnt=flatten(x[a], name + a + '/')
out+=tmp
label+=nm
count+=cnt
elif isinstance(x, list):
i = 0
for a in x:
tmp,nm,cnt=flatten(a, name + str(i) + '/')
out+=tmp
label += nm
count+=cnt
i += 1
else:
count += 1
out.append(x)
label.append(name)
return out,label,count
#open json file
print("Loading json file")
with open(fileDir,encoding = 'utf-8', newline='') as file:
data = file.read().replace('\n', '')
all_data = json.loads(data)
print(all_data)
global count
#flatten data
flat = flatten_json(all_data)
#create csv with flattened data
print("Saving data as "+outFile+".csv")
data_csv = open(outFile+".csv", 'w',newline='')
csvwriter = csv.writer(data_csv)
data_csv.write('SEP=,\n')
for row in flat :
csvwriter.writerow(row)
#save data as xlsx
print("Saving data as "+outFile +'.xlsx')
workbook = xlsxwriter.Workbook(outFile +'.xlsx',)
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
for r, row in enumerate(flat):
for c, col in enumerate(row):
if r==0:
worksheet.write(r, c, col, bold)
else:
worksheet.write_string(r, c, str(col))
workbook.close()
print("Successfully created files:"+outFile +'.xlsx , ' + outFile+".csv" )
运行命令python JsonToExcel.py
此时生成如下csv文件
再通过关键字匹配找到steg_algorithm=“nsf5”的图像id,将其对应图像提取出来,代码如下:
import os
import shutil
import pandas as pd
import random
# 打开表格文件并读取
f = open("C:/Users/hp/PycharmProjects/pythonProject2/train.csv", "rb") # 打开csv文件
list = pd.read_csv(f) # 这句不能少
listnew = list[list["steg_algorithm"]=="nsf5"] # 对应csv文件图片那一栏的标题
l = listnew["id"].tolist() # 对应csv文件标签那一栏的标题)
for each in l:
j='{:06d}'.format(each) #将图像编号转换为6位整数,不足补零,与原图像名称保持一致
print(j)
shutil.move('D:/实验资源/IStego100K/' + str(j) +'.jpg', 'D:/实验资源/IStego100K/' + '1')
print("完成")
任务完成!
好文链接
发表评论