-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b661ad0
Showing
232 changed files
with
215,547 additions
and
0 deletions.
There are no files selected for viewing
10,000 changes: 10,000 additions & 0 deletions
10,000
PHF_benchmark/Input_data/0/key_0000_1_10000_8__000000.txt
Large diffs are not rendered by default.
Oops, something went wrong.
10,000 changes: 10,000 additions & 0 deletions
10,000
PHF_benchmark/Input_data/1/key_0000_1_10000_8__000000.txt
Large diffs are not rendered by default.
Oops, something went wrong.
10,000 changes: 10,000 additions & 0 deletions
10,000
PHF_benchmark/Input_data/2/key_0000_1_10000_8__000000.txt
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import sys | ||
import math | ||
import os | ||
import random | ||
import numpy as np | ||
|
||
# You need to generate different dimension-1 logic in test_0000_x.py | ||
# In this file, the dimension-1 is set to IPV6 address as a string | ||
# eg: "x.x.x.x.x.x.x.x", each "x" is a number from str(0) to str(16) | ||
# "." does not appear in the string | ||
# Total number of possible IP addesses is: d1 = 16*16*16*16*16*16*16*16 | ||
# The function rand_ip(8) casts this IPv6 string with 8 chars into an integer | ||
# maximum integer of rand_ip(8) would be: (2^32 - 1) | ||
max_ip_num = 16 | ||
# Set n value first | ||
n = 10000 # each set has n input keys | ||
z = 8 ## the name of the file would be 0000_1_54_4.txt | ||
|
||
# set the total number of key set | ||
max_key_set = 1000 | ||
# set the total number of test files | ||
max_itr = 1 | ||
|
||
# generate a random IP address with length = l | ||
# the random number for each 4bits follows uniform distribution | ||
# In hardware, the input IPs do not need to be cast between data types, | ||
# since they are always binary data | ||
def rand_ip(l): | ||
tmp_ip = int(random.uniform(0,max_ip_num)) | ||
for s in range(l-1): | ||
tmp_ip = tmp_ip*16 + int(random.uniform(0,max_ip_num)) | ||
return tmp_ip | ||
# # numpy random | ||
# def np_rand_ip(l): | ||
# tmp_ip = numpy.random.uniform(0,high=max_ip_num,dtype='uint32') | ||
# for s in range(l-1): | ||
# tmp_ip = tmp_ip*16 + numpy.random.uniform(0,high=max_ip_num,dtype='uint32') | ||
# return tmp_ip | ||
|
||
if __name__=='__main__': | ||
print('begin') | ||
################################ | ||
## curent folder | ||
for folder_name in range(max_key_set): | ||
cur_dir = "./Data_%d/%d"%(n,folder_name) | ||
if not os.path.exists(cur_dir): | ||
os.makedirs(cur_dir) | ||
ip_list = [] | ||
i = 0 | ||
while(i < n): | ||
##for i in range(n): | ||
## generate 1 random IP | ||
# 4 xxx IP strings | ||
tmp_ip = str(rand_ip(z)) | ||
if (tmp_ip not in ip_list): | ||
# tmp_ip = np_rand_ip(4) # numpy version | ||
ip_list.append(tmp_ip) | ||
i += 1 | ||
print('size of ip set is: ', len(ip_list)) | ||
print('writing to txt file for folder %d...'%(folder_name)) | ||
# divide the ip list into z group | ||
# shuffle -> divide -> output | ||
for itr in range(max_itr): | ||
random.shuffle(ip_list) | ||
fin = open("%s/key_0000_1_%d_%d__%s.txt"%(cur_dir,n,z,str(itr).zfill(6)),'w') | ||
s = '\n' | ||
fin.write(s.join(ip_list)) | ||
fin.close() | ||
print('finish all folders.') | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# PHF benchmark | ||
This is the benchmark code for (minimum) perfect hash function (PHF/MPHF) algorithms. BBHash, CHD, BDZ and SPHF are compared in terms of constructing time and theoretical storage cost. | ||
|
||
All algorithms need compile first in each folder. Please refer to different README.md in each subfolder. | ||
## bbhash | ||
BBHash algorithm is MPHF generator mainly designed for massive key sets. | ||
|
||
## cmph | ||
cmph is a PHF/MPHF library with multiple different algorithms. We use the the CHD and BDZ in the experiment. | ||
|
||
## Satellite Perfect Hash Function (SPHF) | ||
SPHF is PHF designed for satellite applications. The construction of hash function is based on searching suitable parameters in the helper array. | ||
For more details, please refer to the paper. | ||
|
||
## SPHF-RL | ||
SPHF-RL is PHF optimized according to hardware features, mainly for input key sets with size smaller than/equal to 10000. | ||
|
||
# Dependancy | ||
Ubuntu 18.04, g++, python3, cmake | ||
|
||
# Input data generation | ||
Input_data folder contains inputs used for generating PHF/MPHF functions and test the correctness of those functions. | ||
|
||
`python3 generate_test_data.py` | ||
|
||
The python3 program generates 1000 input files (mark as 1000 different folders). Example includes 0,1,2 folders. | ||
By default, the size of input key set is 10000, please change n in line 16 for other input size. | ||
|
||
# PHF Result | ||
Construction phase: | ||
|
||
1, For construction time, 1000 experiments are conducted. The max time and average time are calculated. | ||
|
||
2, For storage cost, source code of algorithms is analyzed and storage is estimated with the consideration of data structures used in construction steps. | ||
Optimizations for existing algorithms (BBHash, CHD, BDZ) on hardware pletform remains as a future research direction. | ||
|
||
3, For function size, assume input are 10000 32-bit keys: | ||
|
||
BBHash: Estimation based on 2 levels of hash. For input, we allocate a buffer with size 32*10000=320000 bit. The conflict mark array for each level needs 10000 bit. Total is 340000 bit. | ||
|
||
CHD_PH, BDZ_PH: The storage is directly calculated by checking the size of function_file. | ||
|
||
SPHF: The storage is estimated with consideration of D[]. | ||
|
||
# RL Result (10000 32-bit input IPs) | ||
The analysis is similar to PHF results. | ||
|
||
RL for these algorithms needs an output array with size m*2 bit (I[]). | ||
|
||
For construction time, an Epsilon time is added. | ||
|
||
For storage cost, m*2bit is added. | ||
|
||
For function space, m*2bit is added. | ||
|
||
Method |AVG time(ms) |MAX time(ms) |Storage(Kb) |Function Space(Kb) | ||
-------------|------------------|:-------------:|:-------------:|:---------: | ||
BDZ-PH-RL |3.73+e |6.85+e |3457 |39.6 | ||
CHD-PH-RL |2.69+e |4.79+e |957 |30 | ||
BBHash-RL |7.64+e |28.62+e |352 |39.2 | ||
SPHF-RL |3.65+e |5.96+e |538 |50 | ||
|
||
# Citation | ||
The current work is for IEEE TRANSACTIONS ON AEROSPACE AND ELECTRONIC SYSTEMS. | ||
Any questions, please contact [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"files.associations": { | ||
"iosfwd": "cpp" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
//--------------------------------------------------------------------------- | ||
#ifndef AllocatorH | ||
#define AllocatorH | ||
//--------------------------------------------------------------------------- | ||
#include <stddef.h> | ||
//--------------------------------------------------------------------------- | ||
#pragma pack(push,1) | ||
//--------------------------------------------------------------------------- | ||
enum TAllocConsts | ||
{ | ||
#ifdef __BORLANDC__ | ||
acDefAlign = sizeof(int), | ||
#else | ||
acDefAlign = 32, | ||
#endif | ||
}; | ||
//--------------------------------------------------------------------------- | ||
class TAllocator | ||
{ | ||
public: | ||
char *Malloc(size_t size,int align=acDefAlign); | ||
char *Realloc(void *mem,size_t size,int align=acDefAlign,size_t oldSize=0); | ||
void Free(void *mem); | ||
}; | ||
//--------------------------------------------------------------------------- | ||
extern TAllocator defAllocator; | ||
//--------------------------------------------------------------------------- | ||
//#if !defined(WIN32) && !defined(_WIN32) | ||
//#include <linux/vmalloc.h> | ||
//#else | ||
void *vmalloc(size_t sz); | ||
void vfree(void *mem); | ||
//#endif//WIN32 | ||
//--------------------------------------------------------------------------- | ||
void *vmallocReserve(size_t sz); | ||
bool vmallocCommit(void* ptr,size_t sz); | ||
//--------------------------------------------------------------------------- | ||
#pragma pack(pop) | ||
//--------------------------------------------------------------------------- | ||
#endif //Exclusive Include |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
//---- -------------------------------------------------------------------------------------------------------- | ||
#include "Allocator.h" | ||
//---- -------------------------------------------------------------------------------------------------------- | ||
TAllocator defAllocator; | ||
//---- -------------------------------------------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
//--------------------------------------------------------------------------- | ||
#include "Allocator.h" | ||
#include <malloc.h> | ||
#include <string.h> | ||
#include <assert.h> | ||
#include "embdDbg.h" | ||
//--------------------------------------------------------------------------- | ||
// If there is no suitable allocator, use the default C allocator. | ||
//--------------------------------------------------------------------------- | ||
char *TAllocator::Malloc(size_t size,int align) | ||
{ | ||
logEmbdMsg("Prepare alloc %d-byte memory of align %d.\r\n",(int)size,align); | ||
// #ifndef __LSeRTOS__ | ||
// assert(align<=(int)sizeof(int)); | ||
// #endif // __LSeRTOS__ | ||
return (char*)malloc(size); | ||
} | ||
//--------------------------------------------------------------------------- | ||
char *TAllocator::Realloc(void *mem,size_t size,int align,size_t oldSize) | ||
{ | ||
logEmbdMsg("Realloc %d-byte memory of align %d.\r\n",(int)size,align); | ||
#ifndef __LSeRTOS__ | ||
if(align>(int)sizeof(void *)) | ||
{ | ||
char *p = Malloc(size,align); | ||
if(p==NULL) | ||
return NULL; | ||
//This is WRONG. We should query the actual size of mem. But we havn't that interface yet. | ||
if(oldSize==0) | ||
oldSize = size; | ||
memcpy(p,mem,oldSize); | ||
Free(mem); | ||
return p; | ||
} | ||
#endif // __LSeRTOS__ | ||
|
||
return (char*)realloc(mem,size); | ||
} | ||
//--------------------------------------------------------------------------- | ||
void TAllocator::Free(void *mem) | ||
{ | ||
free(mem); | ||
} | ||
//--------------------------------------------------------------------------- |
Oops, something went wrong.