Improved signature file reader for front payload signature matcher (hopefully, no segfaults any more due to empty lines etc.)

Example configuration for payload signature matcher


git-svn-id: file:///Users/braun/svn/vermont/branches/vermont/dtls-merge@2548 aef3b71b-58ee-0310-9ba9-8811b9f0742f
master
muenz 2010-09-28 13:21:21 +00:00
parent 7706521562
commit 9abc64c7d6
8 changed files with 191 additions and 136 deletions

View File

@ -0,0 +1,15 @@
<ipfixConfig>
<ipfixCollector id="1">
<listener>
<transportProtocol>UDP</transportProtocol>
<port>1500</port>
</listener>
<next>5</next>
</ipfixCollector>
<frontPayloadSigMatcher id="5">
<signaturedir>./configs/signatures</signaturedir>
</frontPayloadSigMatcher>
</ipfixConfig>

10
configs/signatures/HTTP Normal file
View File

@ -0,0 +1,10 @@
TOKEN
HTTP
SUPPORT
0.50000
TOKEN
GET
SUPPORT
0.50000
THRESHOLD
1.0

View File

@ -0,0 +1,12 @@
SIGNATUREID
HTTP reply
TOKEN
HTTP
SUPPORT
0.50000
TOKEN
200 OK
SUPPORT
0.50000
THRESHOLD
1.0

View File

@ -47,8 +47,7 @@ void FrontPayloadSigMatcher::matchConnection(Connection* conn)
for (int32_t j=0; j<sigmatcher->numOfClasses; j++) {
if (results[j]==1) {
msg(MSG_DIALOG, "SIGMATCHER: matched record on class '%s' in token %u",
sigmatcher->signatures[j]->id, j);
msg(MSG_DIALOG, "SIGMATCHER: front payload matches signature '%s'", sigmatcher->signatures[j]->id);
msg(MSG_DIALOG, "%s", conn->toString().c_str());
}
}

View File

@ -24,7 +24,7 @@
struct Matcher * new_matcher(const char* signature_directory){
int j,i;
/* Read in the signatures of the signature directory */
int numOf_SignatureClasses = 0;
int numOf_SignatureClasses = 0; // Gerhard: unclear why this is called number of signatures classes, it is the number of signatures
struct BayesSignature ** signatures = read_signature_files(signature_directory, &numOf_SignatureClasses);
/* Initialize the matcher with those read-in signatures */
struct Matcher * matcher;
@ -167,12 +167,12 @@ int * match_single_flow(struct Matcher * matcher, const char *flow, int length,
output = 1;
}
if(classwise_matching_sums[i] >= matcher->mapper->classwise_thresholds[i]) {
printf("class %d matched\n", i);
//printf("class %d matched\n", i);
index_tokens = 0;
for (k = 0; k < matcher->numOfClasses; ++k) {
for (j = 0; j < matcher->mapper->classwise_numOfTokens[k]; ++j) {
if (k==i && binary_matches_tokenwise[index_tokens]>0) {
printf(" `- token %d matched\n", j);
//printf(" `- token %d matched\n", j);
}
index_tokens++;
}

View File

@ -30,8 +30,8 @@ struct BayesSignature * new_Signature_i(int maxNumOfTokens, char * filename){
signature->tokens = (struct Token **)malloc(maxNumOfTokens*sizeof(struct Token *));
signature->id = malloc(sizeof(char)*(ID_STRING_SIZE));
filename[ID_STRING_SIZE] = 0;
strcpy(signature->id, filename);
strncpy(signature->id, filename, ID_STRING_SIZE);
signature->id[ID_STRING_SIZE-1] = '\0';
signature->classinformation = new_signatureclass();
signature->numOfTokens = 0;
signature->maxNumOfTokens = maxNumOfTokens;
@ -40,6 +40,15 @@ struct BayesSignature * new_Signature_i(int maxNumOfTokens, char * filename){
return signature;
}
/* Remove carriage return/line feed from end of string */
char* removeCRLF(char* str) {
int i;
for (i = strlen(str); i > 0; i--)
if ((str[i-1] == '\n') || (str[i-1] == '\r'))
str[i-1] = '\0';
return str;
}
/* Constructor for a signature, based on a the full filename (i.e. signature.sig) */
struct BayesSignature * new_Signature_s(const char * absolute_path, char * filename_sig){
FILE * fp;
@ -51,9 +60,9 @@ struct BayesSignature * new_Signature_s(const char * absolute_path, char * filen
snprintf(absolute_filename,299, "%s/%s", absolute_path, filename_sig);
if( (fp=fopen(absolute_filename, "r")) == NULL) {
printf("\nCannot open file \"%s\"\n", absolute_filename);
perror(NULL);
exit(1);
printf("\nCannot open file \"%s\"\n", absolute_filename);
perror(NULL);
exit(1);
}
/** Check filesize for content allocation
@ -76,81 +85,79 @@ struct BayesSignature * new_Signature_s(const char * absolute_path, char * filen
rewind(fp);
while(!feof(fp))
if(fgets(line, MAX_TOKENSIZE-1, fp)){
lines[i] = (char *)malloc(MAX_TOKENSIZE*sizeof(char));
strcpy(lines[i], line);
i++;
}
if(fgets(line, MAX_TOKENSIZE-1, fp)){
lines[i] = (char *)malloc(MAX_TOKENSIZE*sizeof(char));
strcpy(lines[i], line);
i++;
}
fclose(fp);
fclose(fp);
/** Read the specific supports, token and threshold out of the file */
/** Read the specific supports, token and threshold out of the file */
double threshold = 0.0,
support = 0.0;
support = 0.0;
char temp_token[MAX_TOKENSIZE] = "";
char temp_id[ID_STRING_SIZE] = "";
int get_signatureid = 0, get_classmembers = 0,
get_token = 0, get_support = 0, get_threshold = 0,
token_length = 0;
get_token = 0, get_support = 0, get_threshold = 0,
token_length = 0;
struct Token * token;
struct BayesSignature * signature;
signature = new_Signature_i(numOfTokens, filename_sig);
for (i = 0; i < numOfLines; i++) {
if(get_signatureid == 1){
strcpy(temp_id, lines[i]);
temp_id[ID_STRING_SIZE] = 0;
strcpy(signature->id, temp_id);
//printf("%i:*%s*\n", i, lines[i]);
/* First, check if we have a keyword */
if(strncmp(lines[i], "SIGNATUREID", 11) == 0) {
get_classmembers = get_token = get_support = get_threshold = 0;
get_signatureid = 1;
} else if(strncmp(lines[i], "CLASSMEMBERS", 12) == 0) {
get_signatureid = get_token = get_support = get_threshold = 0;
get_classmembers = 1;
} else if(strncmp(lines[i], "TOKEN", 5) == 0) {
get_signatureid = get_classmembers = get_support = get_threshold = 0;
get_token = 1;
} else if(strncmp(lines[i], "SUPPORT", 7) == 0) {
get_signatureid = get_classmembers = get_token = get_threshold = 0;
get_support = 1;
} else if(strncmp(lines[i], "THRESHOLD", 9) == 0) {
get_signatureid = get_classmembers = get_token = get_support = 0;
get_threshold = 1;
}
/* If not a new keyword, process value of last keyword */
else if((get_signatureid == 1) && (strlen(removeCRLF(lines[i])) > 0)){
strncpy(signature->id, lines[i], ID_STRING_SIZE);
signature->id[ID_STRING_SIZE-1] = '\0';
get_signatureid = 0;
}
if(get_classmembers == 1 && !strncmp(lines[i], "TOKEN", 5) == 0){
} else if((get_classmembers == 1) && (strlen(removeCRLF(lines[i])) > 0)){
/* Read the next classmemberID */
/* Here, we accept multiple lines */
add_classid_to_signatureclass(signature->classinformation, lines[i]);
}
if(get_token == 1 && !strncmp(lines[i], "SUPPORT", 7) == 0){
int j = 0;
while(lines[i][j] != 0){
token_length++;
j++;
} else if(get_token == 1){
/* Here, we keep CRLF and accept multiple lines */
if((token_length + strlen(lines[i])) < MAX_TOKENSIZE) {
token_length += strlen(lines[i]);
strcat(temp_token, lines[i]);
}
strcat(temp_token,lines[i]);
}
if(get_support == 1){
} else if((get_support == 1) && (strlen(removeCRLF(lines[i])) > 0)){
support = get_double(lines[i]);
/* Remove last \n of temp_token */
temp_token[token_length-1] = 0;
removeCRLF(temp_token);
token = new_Token(temp_token, support, token_length-1);
add_Token(signature, token);
temp_token[0] = 0;
token_length = 0;
get_support = 0;
}
if(get_threshold == 1)
} else if((get_threshold == 1) && (strlen(removeCRLF(lines[i])) > 0)){
threshold = get_double(lines[i]);
if(strncmp(lines[i], "SIGNATUREID", 11) == 0)
get_signatureid = 1;
if(strncmp(lines[i], "CLASSMEMBERS", 12) == 0)
get_classmembers = 1;
if(strncmp(lines[i], "TOKEN", 5) == 0){
get_token = 1;
get_classmembers = 0;
get_threshold = 0;
}
if(strncmp(lines[i], "SUPPORT", 7) == 0){
get_support = 1;
get_token = 0;
/* Check for consistency */
if(get_signatureid + get_classmembers + get_support + get_threshold + get_token > 1) {
printf("\nMore than more get_* is 1, this should never happen\n");
exit(1);
}
if(strncmp(lines[i], "THRESHOLD", 9) == 0)
get_threshold = 1;
}
signature->threshold = threshold;
@ -180,14 +187,12 @@ void destruct_signatureclass(struct SignatureClass * class){
}
void add_classid_to_signatureclass(struct SignatureClass * class, char * new_memberID){
// Gerhard: It seems that this function does not do what it is supposed to.
// I think that a class id is not added to a signature, but a signature id to a class.
class->member_IDs = realloc(class->member_IDs, sizeof(char *) * (class->numOfMembers + 1));
class->member_IDs[class->numOfMembers] = malloc(sizeof(char) * ID_STRING_SIZE);
new_memberID[ID_STRING_SIZE] = 0;
int id_length = 0;
while(new_memberID[id_length] != 0) id_length++;
new_memberID[id_length-1] = 0;
strcpy(class->member_IDs[class->numOfMembers], new_memberID);
strncpy(class->member_IDs[class->numOfMembers], new_memberID, ID_STRING_SIZE);
class->member_IDs[class->numOfMembers][ID_STRING_SIZE-1] = '\0';
class->numOfMembers++;
}
@ -239,27 +244,27 @@ char* token_to_string(struct Token *token){
/* Returned char* needs to be freed.
* length is an in/out-Parameter */
char * deescape_string(char *s, int *length){
char *t = malloc(sizeof(char)*MAX_TOKENSIZE);
char buf[5] = "0x00";
unsigned int j, i, k;
char *t = malloc(sizeof(char)*MAX_TOKENSIZE);
char buf[5] = "0x00";
unsigned int j, i, k;
for (k = 0, i = 0; i < strlen(s); i++) {
if (s[i] != '%') {
t[k++] = s[i];
continue;
}
for (k = 0, i = 0; i < strlen(s); i++) {
if (s[i] != '%') {
t[k++] = s[i];
continue;
}
/* Skip truncated sequence */
if (strlen(s) - i < 2)
break;
/* Skip truncated sequence */
if (strlen(s) - i < 2)
break;
buf[2] = s[++i];
buf[3] = s[++i];
sscanf(buf, "%x", (unsigned int *) &j);
t[k++] = (char) j;
}
*length = k;
return t;
buf[2] = s[++i];
buf[3] = s[++i];
sscanf(buf, "%x", (unsigned int *) &j);
t[k++] = (char) j;
}
*length = k;
return t;
}
void print_signature(struct BayesSignature * signature){
int i = 0;

View File

@ -60,6 +60,7 @@ struct BayesSignature {
* The most representative signature of those members can be
* addressed via the representative, which behaves as an index
* to the member_ids */
// Gerhard: If the idea is that multiple signatures should belong to one class, this is not how it is implemented!
struct SignatureClass {
int representative;
char ** member_IDs;

View File

@ -23,6 +23,7 @@
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdlib.h>
@ -37,7 +38,7 @@ double get_double(char string[]){
ret = atof(string);
}else{
int pre_comma_i = -1.0,
post_comma_i = -1.0;
post_comma_i = -1.0;
char * tokens[2];
char separator[] = ",. ";
strtoken(string, separator, tokens);
@ -57,7 +58,7 @@ double get_double(char string[]){
}
double pre_comma_d = (double)pre_comma_i,
post_comma_d = (double)post_comma_i;
post_comma_d = (double)post_comma_i;
// Check the number of deleted 0's in post_comma
while(sizeOfToken1 > 1){
@ -71,13 +72,13 @@ double get_double(char string[]){
}
int strtoken(char *str, char *separator, char *tokens[]){
int i = 0;
tokens[0] = strtok(str, separator);
while ( tokens[i] ) {
i++;
tokens[i] = strtok(NULL, separator);
}
return i;
int i = 0;
tokens[0] = strtok(str, separator);
while ( tokens[i] ) {
i++;
tokens[i] = strtok(NULL, separator);
}
return i;
}
int get_length(char * string){
@ -88,59 +89,71 @@ int get_length(char * string){
char ** get_filenames(const char * directory, int * num_of_files) {
int i = 0;
DIR *pDIR;
struct dirent *pDirEnt;
DIR *pDIR;
struct dirent *pDirEnt;
struct stat dirEntStat;
char filename[256];
/* Open the current directory */
/* Open the current directory */
pDIR = opendir(directory);
pDIR = opendir(directory);
if ( pDIR == NULL ) {
fprintf( stderr, "%s %d: opendir() failed (%s)\n",
__FILE__, __LINE__, strerror( errno ));
exit( -1 );
}
if ( pDIR == NULL ) {
fprintf( stderr, "%s %d: opendir() failed (%s)\n",
__FILE__, __LINE__, strerror( errno ));
exit( -1 );
}
/* Count filenames */
int filecounter = 0;
pDirEnt = readdir( pDIR );
while ( pDirEnt != NULL ) {
if( (strcmp(pDirEnt->d_name, ".") != 0 ) && (strcmp(pDirEnt->d_name, "..") != 0 )){
//printf( "FOUND: %s\n", pDirEnt->d_name );
filecounter++;
}
pDirEnt = readdir( pDIR );
}
*num_of_files = filecounter;
//printf("\n%i relevant files found.\n", filecounter);
/* Get each directory entry from pDIR and store its name
* in an array */
char ** filenames;
filenames = (char **)malloc(sizeof(char*)*filecounter);
for (i = 0; i < filecounter; ++i)
filenames[i] = (char*)malloc(sizeof(char)*MAX_SIZE_OF_FILENAME);
// Rewind??
rewinddir(pDIR);
i = 0;
pDirEnt = readdir( pDIR );
/* Count filenames */
int filecounter = 0;
pDirEnt = readdir( pDIR );
while ( pDirEnt != NULL ) {
//printf( "%s\n", pDirEnt->d_name );
if( (strcmp(pDirEnt->d_name, ".") != 0 ) && (strcmp(pDirEnt->d_name, "..") != 0 )){
snprintf(filenames[i], MAX_SIZE_OF_FILENAME-1, "%s", pDirEnt->d_name);
//printf( "ADDED: %s\n", filenames[i] );
i++;
/* Skip hidden entries starting with dot */
if( pDirEnt->d_name[0] != '.' ) {
/* Skip non-regular files (e.g. directories) */
if ( (snprintf(filename, sizeof(filename), "%s/%s", directory, pDirEnt->d_name) < sizeof(filename))
&& (stat(filename, &dirEntStat) == 0) && (S_ISREG(dirEntStat.st_mode)) ) {
//printf( "FOUND: %s\n", pDirEnt->d_name );
filecounter++;
}
}
pDirEnt = readdir( pDIR );
}
/* Release the open directory */
closedir( pDIR );
*num_of_files = filecounter;
return filenames;
//printf("\n%i relevant files found.\n", filecounter);
/* Get each directory entry from pDIR and store its name
* in an array */
char ** filenames;
filenames = (char **)malloc(sizeof(char*)*filecounter);
for (i = 0; i < filecounter; ++i)
filenames[i] = (char*)malloc(sizeof(char)*MAX_SIZE_OF_FILENAME);
// Rewind??
rewinddir(pDIR);
i = 0;
pDirEnt = readdir( pDIR );
while ( pDirEnt != NULL ) {
//printf( "%s\n", pDirEnt->d_name );
/* Skip hidden entries starting with dot */
if( pDirEnt->d_name[0] != '.' ) {
/* Skip non-regular files (e.g. directories) */
if ( (snprintf(filename, sizeof(filename), "%s/%s", directory, pDirEnt->d_name) < sizeof(filename))
&& (stat(filename, &dirEntStat) == 0) && (S_ISREG(dirEntStat.st_mode)) ) {
snprintf(filenames[i], MAX_SIZE_OF_FILENAME-1, "%s", pDirEnt->d_name);
//printf( "ADDED: %s\n", filenames[i] );
i++;
}
}
pDirEnt = readdir( pDIR );
}
/* Release the open directory */
closedir( pDIR );
return filenames;
}