what you don't know can hurt you
Home Files News &[SERVICES_TAB]About Contact Add New

docanal.c

docanal.c
Posted Sep 11, 2001
Authored by hhp, Tarsin | Site hhp-programming.net

A document Analysis tool which is useful for cryptanalysis. From May '98.

SHA-256 | 8024662cda4203693c125747c6a763cff1085e773ab5485c9fe0b23f1c2232b7

docanal.c

Change Mirror Download
/******************************************************************* hhp *****
*** Author: Rob Gubler -- tarsin@happy.digitaldune.net ***
*** Date: 1998.05.03.r00 ***
*** Site: http://www.hhp-programming.net/ ***
*** Description: Document Analysis ***
*** Comments: Really only useful for cryptanalysis ***
*** -s parameter is very CPU intensive; it's best to break ***
*** up your document into smaller pieces if you want to use ***
*** the string analysis ***
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>

typedef struct char_info_s {
char *occurrence_str;
char *rank_str;
char *percentage_str;
} char_info;

int add_to_list(char_info *info);
unsigned long get_doc_len(FILE *fi);
void print_ch_occurrence(unsigned long *ascii_val, unsigned short sz);
void print_ch_used_most(unsigned long *ascii_val, unsigned short sz);
void print_ch_percentage_used(unsigned long *ascii_val, unsigned short sz, unsign
ed long total_bytes);
void print_str_occurrence(char *buffer, unsigned long buff_size);
void print_stars(int ch, unsigned long occrrence, unsigned short star_num, float
percent);
int str_mid(unsigned long start, unsigned long count, char *buffer, unsigned long
buffer_size, char *mid_str, unsigned long mid_str_size);
unsigned long src_buffer_for_str_occurrence(char *buffer, unsigned long buffer_si
ze, char *src_str, unsigned long src_str_len);

char_info *list;
unsigned short list_size;
unsigned short list_array_size;

int main(int argc, char *argv[]) {
unsigned long ascii_val[512] = {0};
unsigned long total_bytes = 0;
unsigned short ch;
unsigned long res;
unsigned long n;
char *file_buffer;
int string_search = 0;
FILE *fi;


if(argc < 2) {
printf("\nNo file specified.\nUse the '-s' option if you want ");
printf("string analysis as well.\n");
return 1;
}

for(n = 0; n < argc; n++) {
if(!strcmp(argv[n], "-s"))
string_search = 1;
}

if((fi = fopen(argv[1], "rt")) == '\0') {
printf("Can't open \"%s\"", argv[1]);
return 1;
}

res = get_doc_len(fi);
file_buffer = calloc(1, res+1);
for(n = 0; (ch = fgetc(fi)) && !feof(fi); n++) {
if(ch > 511)
break;
ascii_val[ch] += 1;
total_bytes += 1;
file_buffer[n] = (char)ch;
}
printf("\n TEXT ANALYSIS \n_______________");
printf("\n\nCHARACTER OCCURRENCE (character type, it's value, and how many ti
me it occurred)");
print_ch_occurrence(ascii_val, 512);
printf("\n\nCHARACTER RANKING (comparison between the characters)");
print_ch_used_most(ascii_val, 512);
printf("\n\nCHARACTER PERCENTAGE (%% of the characters used bases on total do
cument length)");
print_ch_percentage_used(ascii_val, 512, total_bytes);
if(string_search) {
printf("\n\nSTRING OCCURRENCE (checks for file for reoccurring strings)")
;
print_str_occurrence(file_buffer, res);
}

free(file_buffer);
printf("\n");
return 0;
}

unsigned long get_doc_len(FILE *fi) {
unsigned long fi_size;

fseek(fi, 1, SEEK_END);
fi_size = (ftell(fi))-1;
rewind(fi);

return fi_size;
}

void print_str_occurrence(char *buffer, unsigned long buff_size) {
char *search_str;
unsigned long res;
unsigned long i = 0;
unsigned long n = 0;
unsigned long x;

for(i = 0; i <= (buff_size/2); i++) {
for(n = 2; n <= (buff_size/2); n++) {
char* src_str = calloc(1, n+1);

if(str_mid(i, n, buffer, buff_size, src_str, n+1)) {
if((x = src_buffer_for_str_occurrence(buffer, buff_size, src_str,
n)) > 1)
printf("\n%s\npos: %d len: %d occurrence: %d\n---", src_str,
i+1, n, x);
}

free(src_str);

}


}



}

int add_to_list(char_info *info) {
if(list_size == list_array_size) {
char_info *temp = calloc(list_array_size+12, sizeof(char_info));

memcpy(temp, list, sizeof(char_info)*list_array_size);
free(info);
list = calloc(list_array_size+12, sizeof(char_info));
memcpy(list, temp, sizeof(char_info)*list_array_size);
list_array_size += 12;
}
list_size += 1;

memcpy(&list[list_size], info, sizeof(char_info));

return 0;
}

void print_ch_occurrence(unsigned long *ascii_val, unsigned short sz) {
unsigned short i;

for(i = 0; i < sz; i++) {
if(ascii_val[i] != 0 && i == '\n')
printf("\nch = (ascii: '\\n', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0 && i == '\t')
printf("\nch = (ascii: '\\t', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0 && i == '\f')
printf("\nch = (ascii: '\\f', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0 && i == '\r')
printf("\nch = (ascii: '\\r', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0 && i == '\a')
printf("\nch = (ascii: '\\a', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0 && i == '\b')
printf("\nch = (ascii: '\\b', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, ascii_val[i]);
else if(ascii_val[i] != 0)
printf("\nch = (ascii: '%2c', dec: '%3d', hex: '%2x'). occurrence = %
d", i, i, i, ascii_val[i]);

}

}

void print_ch_used_most(unsigned long *ascii_val, unsigned short sz) {
unsigned long highest_count;
unsigned long high_parts[50];
float parts;
unsigned short i;
unsigned short n;

for(i = 0, highest_count = 0; i < sz; i++) {
if(ascii_val[i] > highest_count)
highest_count = ascii_val[i];
}

for(i = 0, parts = 0.0; i < 50; i++) {
parts += (float)0.02;
high_parts[i] = (unsigned long)(highest_count * parts);
}

for(i = 0; i < sz; i++) {
for(n = 0; n <= 50; n++) {
if(ascii_val[i] == 0)
continue;
if(ascii_val[i] <= high_parts[n]) {
print_stars(i, ascii_val[i], n, 0.0);
break;
}

}

}

}

void print_ch_percentage_used(unsigned long *ascii_val, unsigned short sz, unsign
ed long total_bytes) {
unsigned short i;
float percent;
double occrrence;

for(i = 0; i < sz; i++) {
if(ascii_val[i] == 0)
continue;
occrrence = ascii_val[i];
percent = (float)(occrrence / total_bytes)*100;
print_stars(i, ascii_val[i], (unsigned short)percent, percent);
}
}

void print_stars(int ch, unsigned long occrrence, unsigned short star_num, float
percent) {
int n;

printf("\n");
if(ch == '\n')
printf("'\\n' (%6d) | ", occrrence);
else if(ch == '\t')
printf("'\\t' (%6d) | ", occrrence);
else if(ch == '\f')
printf("'\\f' (%6d) | ", occrrence);
else if(ch == '\r')
printf("'\\r' (%6d) | ", occrrence);
else if(ch == '\a')
printf("'\\a' (%6d) | ", occrrence);
else if(ch == '\b')
printf("'\\b' (%6d) | ", occrrence);
else
printf("'%2c' (%6d) | ", ch, occrrence);
if(percent != 0)
printf("%.2f%% ", percent);
for(n = 0; n < star_num; n++)
printf("*");

}


int str_mid(unsigned long start, unsigned long count, char *buffer, unsigned long
buffer_size, char *mid_str, unsigned long mid_str_size) {
unsigned long i;
unsigned long n;

if(start >= buffer_size || (start + count) >= buffer_size || count >= mid_str
_size)
return 0;

for(i = 0; i < start; i++)
buffer++;

for(i = 0; i < count; i++)
mid_str[i] = buffer[i];

return 1;

}

unsigned long src_buffer_for_str_occurrence(char *buffer, unsigned long buffer_si
ze, char *src_str, unsigned long src_str_len) {
unsigned long str_occurrence = 0;
unsigned long i;
unsigned long n;

for(i = 0; i < buffer_size; i++) {
char *buff_cmp = calloc(1, src_str_len+1);

if(str_mid(i, src_str_len, buffer, buffer_size, buff_cmp, src_str_len+1))
{
if(!strcmp(src_str, buff_cmp))
str_occurrence += 1;
}

free(buff_cmp);

}


return str_occurrence;
}


Login or Register to add favorites

File Archive:

April 2024

  • Su
  • Mo
  • Tu
  • We
  • Th
  • Fr
  • Sa
  • 1
    Apr 1st
    10 Files
  • 2
    Apr 2nd
    26 Files
  • 3
    Apr 3rd
    40 Files
  • 4
    Apr 4th
    6 Files
  • 5
    Apr 5th
    26 Files
  • 6
    Apr 6th
    0 Files
  • 7
    Apr 7th
    0 Files
  • 8
    Apr 8th
    22 Files
  • 9
    Apr 9th
    14 Files
  • 10
    Apr 10th
    10 Files
  • 11
    Apr 11th
    13 Files
  • 12
    Apr 12th
    14 Files
  • 13
    Apr 13th
    0 Files
  • 14
    Apr 14th
    0 Files
  • 15
    Apr 15th
    30 Files
  • 16
    Apr 16th
    10 Files
  • 17
    Apr 17th
    22 Files
  • 18
    Apr 18th
    45 Files
  • 19
    Apr 19th
    0 Files
  • 20
    Apr 20th
    0 Files
  • 21
    Apr 21st
    0 Files
  • 22
    Apr 22nd
    0 Files
  • 23
    Apr 23rd
    0 Files
  • 24
    Apr 24th
    0 Files
  • 25
    Apr 25th
    0 Files
  • 26
    Apr 26th
    0 Files
  • 27
    Apr 27th
    0 Files
  • 28
    Apr 28th
    0 Files
  • 29
    Apr 29th
    0 Files
  • 30
    Apr 30th
    0 Files

Top Authors In Last 30 Days

File Tags

Systems

packet storm

© 2022 Packet Storm. All rights reserved.

Services
Security Services
Hosting By
Rokasec
close