#include "curl_dl.h" #include #include #include using std::chrono::duration; using std::chrono::time_point; using std::chrono::steady_clock; using std::chrono::milliseconds; using std::map; using std::ostream; using std::string; using std::stringstream; using std::this_thread::sleep_for; using std::vector; using nlohmann::json; static size_t curl_write_func(void *ptr, size_t size, size_t nmemb, void *userdata) { ostream *out = static_cast(userdata); size_t bytes = 0; if (nullptr == out) { return nmemb * size; } for (size_t idx = 0; idx < nmemb * size; ++idx) { (*out) << static_cast(ptr)[idx]; ++bytes; } return bytes; } static size_t curl_header_func(void *ptr, size_t size, size_t nmemb, void *userdata) { map *out = static_cast *>(userdata); stringstream helper; size_t bytes = 0; string full; string name; string value; size_t pos; constexpr char whitespace[] = " \r\n\t\f\v"; if (nullptr == out) { return nmemb * size; } for (size_t idx = 0; idx < nmemb * size; ++idx) { helper << static_cast(ptr)[idx]; ++bytes; } // Split into 2 strings full = helper.str(); pos = full.find(':'); name = full.substr(0, pos); value = ""; if (pos != string::npos) { value = full.substr(pos + 1); } // Clean whitespace pos = name.find_first_not_of(whitespace); name.erase(0, pos); pos = name.find_last_not_of(whitespace); name.erase(pos + 1); pos = value.find_first_not_of(whitespace); value.erase(0, pos); pos = value.find_last_not_of(whitespace); value.erase(pos + 1); // Make header name lowercase for (int i = 0; i < name.size(); ++i) { if ((name[i] >= 'A') && (name[i] <= 'Z')) { name[i] += 'a' - 'A'; } } // Insert header (*out)[name] = value; return bytes; } void CURL_DL::open_curl() { m_handle = curl_easy_init(); } void CURL_DL::check_rate_limit(const RateLimit& limit) { if (!limit.m_name.empty() && limit.m_ms > 0) { if (m_limits.count(limit.m_name) != 0) { time_point wait_end = m_limits[limit.m_name] + duration(limit.m_ms); time_point now = steady_clock::now(); if (now < wait_end) { sleep_for(wait_end - now); } } } } void CURL_DL::save_rate_limit(const RateLimit& limit) { if (!limit.m_name.empty()) { m_limits[limit.m_name] = steady_clock::now(); } } CURL_DL::CURL_DL() : m_handle(nullptr) { } CURL_DL::~CURL_DL() { if (m_handle != nullptr) { curl_easy_cleanup(m_handle); m_handle = nullptr; } } CURL_DL& CURL_DL::get_handle() { static CURL_DL obj; return obj; } bool CURL_DL::download(const string& url, ostream* out, RateLimit limit) { return download(url, out, nullptr, nullptr, nullptr, limit); } bool CURL_DL::download(const string& url, ostream* out, const vector *headers, RateLimit limit) { return download(url, out, headers, nullptr, nullptr, limit); } bool CURL_DL::download(const string& url, ostream* out, const vector *headers, map *out_headers, RateLimit limit) { return download(url, out, headers, out_headers, nullptr, limit); } bool CURL_DL::download(const string& url, ostream* out, const vector *headers, map *out_headers, const map *params, RateLimit limit) { bool result = true; CURLcode error; struct curl_slist *header_list = nullptr; long http_code; if (nullptr == m_handle) { open_curl(); } if (nullptr == m_handle) { return false; } curl_easy_reset(m_handle); // Enable cookie engine curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "cookies.txt"); // Enable error messages curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error); // Set User-Agent curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Downloader"); if ((nullptr == params) || (params->count("no-redir") == 0)) { // Set Auto Follow (Max 32 Times) curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32); } else { curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 0); } // Allow only HTTP and HTTPS (STR after 7.85.0) // curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https"); curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); // Do Not Return "OK" On 4xx And 5xx curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 0); // Set Output Func curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func); curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out); // Set URL curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str()); // Set Get curl_easy_setopt(m_handle, CURLOPT_HTTPGET, 1); // Set Headers if (nullptr != headers) { for (const string& s : *headers) { header_list = curl_slist_append(header_list, s.c_str()); } } curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list); if (nullptr != out_headers) { curl_easy_setopt(m_handle, CURLOPT_HEADERFUNCTION, curl_header_func); curl_easy_setopt(m_handle, CURLOPT_HEADERDATA, out_headers); } // Set Parameters if (nullptr != params) { // Username if (params->count("user") != 0) { curl_easy_setopt(m_handle, CURLOPT_USERNAME, (*params).at("user").c_str()); } // Password if (params->count("pass") != 0) { curl_easy_setopt(m_handle, CURLOPT_PASSWORD, (*params).at("pass").c_str()); } } check_rate_limit(limit); error = curl_easy_perform(m_handle); if (error != CURLE_OK) { result = false; } error = curl_easy_getinfo(m_handle, CURLINFO_RESPONSE_CODE, &http_code); if (error != CURLE_OK) { result = false; } if (http_code >= 400) { result = false; } save_rate_limit(limit); // Cleanup if (nullptr != header_list) { curl_slist_free_all(header_list); } header_list = nullptr; return result; } bool CURL_DL::post_json(const string& url, json& j, ostream* out, RateLimit limit) { return post_json(url, j, out, nullptr, limit); } bool CURL_DL::post_json(const string& url, json& j, ostream* out, const vector *headers, RateLimit limit) { bool result = true; CURLcode error; struct curl_slist *header_list = nullptr; if (nullptr == m_handle) { open_curl(); } if (nullptr == m_handle) { return false; } curl_easy_reset(m_handle); curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, ""); curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error); // Set User-Agent curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Archiver"); // Set Auto Follow (Max 32 Times) curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32); // Allow only HTTP and HTTPS (STR after 7.85.0) // curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https"); curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); // Do Not Return "OK" On 4xx And 5xx curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 1); // Set Output Func curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func); curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out); // Set URL curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str()); // Set Post curl_easy_setopt(m_handle, CURLOPT_POST, 1); // Set Headers header_list = curl_slist_append(header_list, "Content-Type: application/json"); if (nullptr != headers) { for (const string& s : *headers) { header_list = curl_slist_append(header_list, s.c_str()); } } curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list); // Set Data curl_easy_setopt(m_handle, CURLOPT_COPYPOSTFIELDS, j.dump().c_str()); check_rate_limit(limit); error = curl_easy_perform(m_handle); if (error != CURLE_OK) { result = false; } save_rate_limit(limit); // Cleanup curl_slist_free_all(header_list); header_list = nullptr; return result; } string CURL_DL::get_error() const { return m_error; } string CURL_DL::url_encode(const string& input) { string result; char hex[3]; hex[2] = '\0'; for (int i = 0; i < input.size(); ++i) { if (((input[i] >= ' ') && (input[i] < '0')) || ((input[i] > '9') && (input[i] < 'A')) || ((input[i] > 'Z') && (input[i] < 'a')) || ((input[i] > 'z') && (input[i] <= '~'))) { result += '%'; sprintf(hex, "%X", input[i]); result += hex; } else { result += input[i]; } } return result; } string CURL_DL::calc_redir(string url, const string& location) { size_t pos; pos = url.find('?'); if (string::npos != pos) { url.erase(pos); } if (location[0] == '/') { pos = url.find("://"); if (string::npos == pos) { pos = url.find("/"); } else { pos = url.find("/", pos + 3); } // Pos now points to first / after :// if (string::npos != pos) { url.erase(pos); } url += location; return url; } else { return location; } }