更新时间:2023-02-27 14:48:09
问题可能在这里:char data [MAX_BUFFER_SIZE];
fileOpen.read(& data [ 0 ],尺寸);
当文件大小大于MAX_BUFFER_SIZE< br />时,这会导致缓冲区溢出。
为避免这种情况,请在堆使用new
或malloc
:char data = new char [size];
fileOpen.read(& data [ 0 ],size);
// 在此处执行数据
删除数据;
Hi all,
I want to extract all 3 grams(each 3 gram contain 3 byte with 1 byte shift each time) of files in a directory and count frequency of each 3 gram in files. I have written a simple C++ program to extract 3 grams of binary files recursively and saved them in a hash table as a key.
before I add each key I find key. if it was in heap I did not add this key and just increase member value(frequency of presence 3gram).
The program runs but it stops with an error message saying "windows has triggered a break point in my program.This may be due to the corruption of the heap which indicated a bug in the program or the dlls that it loads"
I would appreciate it if somebody could help me ..
Thanks,
#include "hash_table.h"
#include <string>
#include <windows.h>
#include <fstream>
#include <stdio.h>
#include <iostream>
#define MAX_BUFFER_SIZE 256
typedef CHashTable<int> CLongHashT;
using namespace std;
void makeVocabHash(string dir, CLongHashT HashTperAll, int N) {
/* N -> N-gram! */
HANDLE hFindFile;
WIN32_FIND_DATAA Win32FindData;
CHAR Directory[MAX_PATH];
int counter;
int countNgram;
int i;
string tmp;
fstream fileOpen;
// copying path to directory
sprintf(Directory,"%s\\*.*", &dir[0]);
if((hFindFile = FindFirstFileA(Directory, &Win32FindData)) == INVALID_HANDLE_VALUE){ // if directory not found (finding first file of directory)
return ; // error, directory not found
}
do{
if(strcmp(Win32FindData.cFileName, ".") != 0 && strcmp(Win32FindData.cFileName, "..") != 0){
sprintf(Directory, "%s\\%s", &dir[0], Win32FindData.cFileName);
// if found a file
if(! (Win32FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ) {
// is a file
fileOpen.open(Directory, ios::in | ios::binary | ios::ate);
//size of file
int end = fileOpen.tellg();
fileOpen.seekg (0, ios::beg);
int begin = fileOpen.tellg();
int size = end - begin;
char data[MAX_BUFFER_SIZE];
fileOpen.read(&data[0], size);
fileOpen.close();
counter = 0;
// reading data with N bytes, construct 3 grams and insert to hashT
while( (counter != ((size - N) + 1) ) ) {
for( i=0; i!=N; ++i)
tmp += data[i+counter];
// insert to hashT
if (HashTperAll.GetMember(tmp))
countNgram = *(HashTperAll.GetMember(tmp)) + 1;
else
countNgram = 1;
HashTperAll.AddKey(tmp, &countNgram );
tmp = "";
counter++;
}
}
else {
// is a directory
makeVocabHash(Directory, HashTperAll, N);
}
}
} while(FindNextFileA(hFindFile,&Win32FindData));//finding next file in directory
// closing handles
FindClose(hFindFile);
}
//---------------------------------------------------------------------
void main()
{
CLongHashT HashTDocs;
cout<< "enter a path";
string dir;
cin>> dir;
makeVocabHash(dir,HashTDocs, 3);
}
The problem may be located here:char data[MAX_BUFFER_SIZE]; fileOpen.read(&data[0], size);
This results in a buffer overflow when your file size is greater thanMAX_BUFFER_SIZE<br />.
To avoid this, allocate the buffer on the heap usingnew
ormalloc
:char data = new char[size]; fileOpen.read(&data[0], size); // Do something with data here delete data;