且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

递归函数爬虫目录未检测到所有文件夹

更新时间:2023-02-26 13:26:59

需要判断当前目录是否为空,如果为空,则将该目录放入filevector中,如果不是,则进入目录继续找到.

You need to determine if the current directory is empty, if it is empty, put the directory in the file vector, if not, enter the directory and continue to find.

bool crawldirs(wstring path, wstring mask, vector<wstring>& files) {
    HANDLE hFind = INVALID_HANDLE_VALUE;
    WIN32_FIND_DATA ffd;
    wstring spec;
    stack<wstring> directories;
    directories.push(path);
    files.clear();

    while (!directories.empty()) {
        path = directories.top();
        spec = path + L"\\" + mask;
        directories.pop();
        BOOL isEmpty = true;//determine if it's an empty folder.
        hFind = FindFirstFile(spec.c_str(), &ffd);
        if (hFind == INVALID_HANDLE_VALUE) {
            return false;
        }

        do {
            if (wcscmp(ffd.cFileName, L".") != 0 &&
                wcscmp(ffd.cFileName, L"..") != 0) {
                if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
                    directories.push(path + L"\\" + ffd.cFileName);
                }
                else {
                    files.push_back(path + L"\\" + ffd.cFileName);
                    isEmpty = false;//It not an empty folder since it has normal file.
                }
            }
        } while (FindNextFile(hFind, &ffd) != 0);
        if (isEmpty)
        {
            files.push_back(path);
        }
        if (GetLastError() != ERROR_NO_MORE_FILES) {
            FindClose(hFind);
            return false;
        }

        FindClose(hFind);
        hFind = INVALID_HANDLE_VALUE;
    }

    return true;
}

或者改为 while 循环.使用递归调用列出所有目录、子目录和文件会更清楚.

Or instead the while loop. It's more clearly to use a recursive call to list all directories , sub directories , and files.

bool RetrieveFile(TCHAR* szDir)
{
    BOOL isEmpty = true;
    WIN32_FIND_DATA ffd;
    LARGE_INTEGER filesize;
    _tcscat(szDir, _T("\\*"));
    HANDLE hFind = FindFirstFile(szDir, &ffd);
    // List all the files in the directory with some info about them.
    if (hFind == INVALID_HANDLE_VALUE) {
        return false;
    }
    do
    {
        if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
        {
            if (!_tcscmp(ffd.cFileName, _T(".")) || !_tcscmp(ffd.cFileName, _T("..")))
                continue;
            TCHAR szDircpy[MAX_PATH] = { 0 };
            _tcscpy(szDircpy, szDir);
            szDircpy[_tcslen(szDircpy) - 1] = _T('\0'); // remove "*" character from "xxx\\*"
            _tcscat(szDircpy, ffd.cFileName); //add the subdirectory name into szDir
            RetrieveFile(szDircpy);
        }
        else
        {
            isEmpty = false;
            filesize.LowPart = ffd.nFileSizeLow;
            filesize.HighPart = ffd.nFileSizeHigh;
            TCHAR szfilepath[MAX_PATH] = { 0 };
            _tcscpy(szfilepath, szDir);
            szfilepath[_tcslen(szfilepath) - 1] = _T('\0'); // remove "*" character from "xxx\\*"
            _tcscat(szfilepath, ffd.cFileName); //add the subdirectory name into szDir
            _tprintf(TEXT("%s   %ld bytes\n"), szfilepath, filesize.QuadPart);
        }
    } while (FindNextFile(hFind, &ffd) != 0);
    if (isEmpty)
        _tprintf(TEXT("%s   [DIR]\n"), szDir);
    FindClose(hFind);
    return true;
}

用法:

TCHAR dir[MAX_PATH] = _T("D:\\");
RetrieveFile(dir);