438 lines
8.6 KiB
C++
438 lines
8.6 KiB
C++
#include "curl_dl.h"
|
|
|
|
#include <thread>
|
|
#include <sstream>
|
|
|
|
#include <string.h>
|
|
|
|
using std::chrono::duration;
|
|
using std::chrono::time_point;
|
|
using std::chrono::steady_clock;
|
|
using std::chrono::milliseconds;
|
|
using std::map;
|
|
using std::ostream;
|
|
using std::string;
|
|
using std::stringstream;
|
|
using std::this_thread::sleep_for;
|
|
using std::vector;
|
|
using nlohmann::json;
|
|
|
|
static size_t curl_write_func(void *ptr, size_t size, size_t nmemb, void *userdata)
|
|
{
|
|
ostream *out = static_cast<ostream*>(userdata);
|
|
size_t bytes = 0;
|
|
|
|
if (nullptr == out)
|
|
{
|
|
return nmemb * size;
|
|
}
|
|
|
|
for (size_t idx = 0; idx < nmemb * size; ++idx)
|
|
{
|
|
(*out) << static_cast<char*>(ptr)[idx];
|
|
++bytes;
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
static size_t curl_header_func(void *ptr, size_t size, size_t nmemb, void *userdata)
|
|
{
|
|
map<string, string> *out = static_cast<map<string, string> *>(userdata);
|
|
stringstream helper;
|
|
size_t bytes = 0;
|
|
string full;
|
|
string name;
|
|
string value;
|
|
size_t pos;
|
|
constexpr char whitespace[] = " \r\n\t\f\v";
|
|
|
|
if (nullptr == out)
|
|
{
|
|
return nmemb * size;
|
|
}
|
|
|
|
for (size_t idx = 0; idx < nmemb * size; ++idx)
|
|
{
|
|
helper << static_cast<char*>(ptr)[idx];
|
|
++bytes;
|
|
}
|
|
|
|
// Split into 2 strings
|
|
full = helper.str();
|
|
pos = full.find(':');
|
|
name = full.substr(0, pos);
|
|
value = "";
|
|
|
|
if (pos != string::npos)
|
|
{
|
|
value = full.substr(pos + 1);
|
|
}
|
|
|
|
// Clean whitespace
|
|
pos = name.find_first_not_of(whitespace);
|
|
name.erase(0, pos);
|
|
pos = name.find_last_not_of(whitespace);
|
|
name.erase(pos + 1);
|
|
|
|
pos = value.find_first_not_of(whitespace);
|
|
value.erase(0, pos);
|
|
pos = value.find_last_not_of(whitespace);
|
|
value.erase(pos + 1);
|
|
|
|
// Make header name lowercase
|
|
for (int i = 0; i < name.size(); ++i)
|
|
{
|
|
if ((name[i] >= 'A') && (name[i] <= 'Z'))
|
|
{
|
|
name[i] += 'a' - 'A';
|
|
}
|
|
}
|
|
|
|
// Insert header
|
|
(*out)[name] = value;
|
|
|
|
return bytes;
|
|
}
|
|
|
|
void CURL_DL::open_curl()
|
|
{
|
|
m_handle = curl_easy_init();
|
|
}
|
|
|
|
void CURL_DL::check_rate_limit(const RateLimit& limit)
|
|
{
|
|
if (!limit.m_name.empty() && limit.m_ms > 0)
|
|
{
|
|
if (m_limits.count(limit.m_name) != 0)
|
|
{
|
|
time_point<steady_clock> wait_end = m_limits[limit.m_name] + duration<int, std::milli>(limit.m_ms);
|
|
time_point<steady_clock> now = steady_clock::now();
|
|
|
|
if (now < wait_end)
|
|
{
|
|
sleep_for(wait_end - now);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CURL_DL::save_rate_limit(const RateLimit& limit)
|
|
{
|
|
if (!limit.m_name.empty())
|
|
{
|
|
m_limits[limit.m_name] = steady_clock::now();
|
|
}
|
|
}
|
|
|
|
CURL_DL::CURL_DL()
|
|
: m_handle(nullptr)
|
|
{
|
|
}
|
|
|
|
CURL_DL::~CURL_DL()
|
|
{
|
|
if (m_handle != nullptr)
|
|
{
|
|
curl_easy_cleanup(m_handle);
|
|
m_handle = nullptr;
|
|
}
|
|
}
|
|
|
|
CURL_DL& CURL_DL::get_handle()
|
|
{
|
|
static CURL_DL obj;
|
|
return obj;
|
|
}
|
|
|
|
bool CURL_DL::download(const string& url, ostream* out, RateLimit limit)
|
|
{
|
|
return download(url, out, nullptr, nullptr, nullptr, limit);
|
|
}
|
|
|
|
bool CURL_DL::download(const string& url, ostream* out,
|
|
const vector<string> *headers, RateLimit limit)
|
|
{
|
|
return download(url, out, headers, nullptr, nullptr, limit);
|
|
}
|
|
|
|
bool CURL_DL::download(const string& url, ostream* out,
|
|
const vector<string> *headers, map<string, string> *out_headers,
|
|
RateLimit limit)
|
|
{
|
|
return download(url, out, headers, out_headers, nullptr, limit);
|
|
}
|
|
|
|
bool CURL_DL::download(const string& url, ostream* out,
|
|
const vector<string> *headers, map<string, string> *out_headers,
|
|
const map<string, string> *params, RateLimit limit)
|
|
{
|
|
bool result = true;
|
|
CURLcode error;
|
|
struct curl_slist *header_list = nullptr;
|
|
long http_code;
|
|
|
|
if (nullptr == m_handle)
|
|
{
|
|
open_curl();
|
|
}
|
|
|
|
if (nullptr == m_handle)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
curl_easy_reset(m_handle);
|
|
|
|
// Enable cookie engine
|
|
curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "cookies.txt");
|
|
|
|
// Enable error messages
|
|
curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);
|
|
|
|
// Set User-Agent
|
|
curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Downloader");
|
|
|
|
if ((nullptr == params) || (params->count("no-redir") == 0))
|
|
{
|
|
// Set Auto Follow (Max 32 Times)
|
|
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
|
|
curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);
|
|
}
|
|
else
|
|
{
|
|
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 0);
|
|
}
|
|
|
|
// Allow only HTTP and HTTPS (STR after 7.85.0)
|
|
// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
|
|
curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
|
|
|
// Do Not Return "OK" On 4xx And 5xx
|
|
curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 0);
|
|
|
|
// Set Output Func
|
|
curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
|
|
curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);
|
|
|
|
// Set URL
|
|
curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());
|
|
|
|
// Set Get
|
|
curl_easy_setopt(m_handle, CURLOPT_HTTPGET, 1);
|
|
|
|
// Set Headers
|
|
if (nullptr != headers)
|
|
{
|
|
for (const string& s : *headers)
|
|
{
|
|
header_list = curl_slist_append(header_list, s.c_str());
|
|
}
|
|
}
|
|
curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);
|
|
|
|
if (nullptr != out_headers)
|
|
{
|
|
curl_easy_setopt(m_handle, CURLOPT_HEADERFUNCTION, curl_header_func);
|
|
curl_easy_setopt(m_handle, CURLOPT_HEADERDATA, out_headers);
|
|
}
|
|
|
|
// Set Parameters
|
|
if (nullptr != params)
|
|
{
|
|
// Username
|
|
if (params->count("user") != 0)
|
|
{
|
|
curl_easy_setopt(m_handle, CURLOPT_USERNAME, (*params).at("user").c_str());
|
|
}
|
|
|
|
// Password
|
|
if (params->count("pass") != 0)
|
|
{
|
|
curl_easy_setopt(m_handle, CURLOPT_PASSWORD, (*params).at("pass").c_str());
|
|
}
|
|
}
|
|
|
|
check_rate_limit(limit);
|
|
|
|
error = curl_easy_perform(m_handle);
|
|
if (error != CURLE_OK)
|
|
{
|
|
result = false;
|
|
}
|
|
|
|
error = curl_easy_getinfo(m_handle, CURLINFO_RESPONSE_CODE, &http_code);
|
|
if (error != CURLE_OK)
|
|
{
|
|
result = false;
|
|
}
|
|
|
|
if (http_code >= 400)
|
|
{
|
|
result = false;
|
|
}
|
|
|
|
save_rate_limit(limit);
|
|
|
|
// Cleanup
|
|
if (nullptr != header_list)
|
|
{
|
|
curl_slist_free_all(header_list);
|
|
}
|
|
header_list = nullptr;
|
|
|
|
return result;
|
|
}
|
|
|
|
bool CURL_DL::post_json(const string& url, json& j, ostream* out, RateLimit limit)
|
|
{
|
|
return post_json(url, j, out, nullptr, limit);
|
|
}
|
|
|
|
bool CURL_DL::post_json(const string& url, json& j, ostream* out,
|
|
const vector<string> *headers, RateLimit limit)
|
|
{
|
|
bool result = true;
|
|
CURLcode error;
|
|
struct curl_slist *header_list = nullptr;
|
|
|
|
if (nullptr == m_handle)
|
|
{
|
|
open_curl();
|
|
}
|
|
|
|
if (nullptr == m_handle)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
curl_easy_reset(m_handle);
|
|
|
|
curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "");
|
|
|
|
curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);
|
|
|
|
// Set User-Agent
|
|
curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Archiver");
|
|
|
|
// Set Auto Follow (Max 32 Times)
|
|
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
|
|
curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);
|
|
|
|
// Allow only HTTP and HTTPS (STR after 7.85.0)
|
|
// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
|
|
curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
|
|
|
// Do Not Return "OK" On 4xx And 5xx
|
|
curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 1);
|
|
|
|
// Set Output Func
|
|
curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
|
|
curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);
|
|
|
|
// Set URL
|
|
curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());
|
|
|
|
// Set Post
|
|
curl_easy_setopt(m_handle, CURLOPT_POST, 1);
|
|
|
|
// Set Headers
|
|
header_list = curl_slist_append(header_list, "Content-Type: application/json");
|
|
if (nullptr != headers)
|
|
{
|
|
for (const string& s : *headers)
|
|
{
|
|
header_list = curl_slist_append(header_list, s.c_str());
|
|
}
|
|
}
|
|
curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);
|
|
|
|
// Set Data
|
|
curl_easy_setopt(m_handle, CURLOPT_COPYPOSTFIELDS, j.dump().c_str());
|
|
|
|
check_rate_limit(limit);
|
|
|
|
error = curl_easy_perform(m_handle);
|
|
if (error != CURLE_OK)
|
|
{
|
|
result = false;
|
|
}
|
|
|
|
save_rate_limit(limit);
|
|
|
|
// Cleanup
|
|
curl_slist_free_all(header_list);
|
|
header_list = nullptr;
|
|
|
|
return result;
|
|
}
|
|
|
|
string CURL_DL::get_error() const
|
|
{
|
|
return m_error;
|
|
}
|
|
|
|
string CURL_DL::url_encode(const string& input)
|
|
{
|
|
string result;
|
|
char hex[3];
|
|
|
|
hex[2] = '\0';
|
|
|
|
for (int i = 0; i < input.size(); ++i)
|
|
{
|
|
if (((input[i] >= ' ') && (input[i] < '0')) ||
|
|
((input[i] > '9') && (input[i] < 'A')) ||
|
|
((input[i] > 'Z') && (input[i] < 'a')) ||
|
|
((input[i] > 'z') && (input[i] <= '~')))
|
|
{
|
|
result += '%';
|
|
sprintf(hex, "%X", input[i]);
|
|
result += hex;
|
|
}
|
|
else
|
|
{
|
|
result += input[i];
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
string CURL_DL::calc_redir(string url, const string& location)
|
|
{
|
|
size_t pos;
|
|
|
|
pos = url.find('?');
|
|
if (string::npos != pos)
|
|
{
|
|
url.erase(pos);
|
|
}
|
|
|
|
if (location[0] == '/')
|
|
{
|
|
pos = url.find("://");
|
|
if (string::npos == pos)
|
|
{
|
|
pos = url.find("/");
|
|
}
|
|
else
|
|
{
|
|
pos = url.find("/", pos + 3);
|
|
}
|
|
|
|
// Pos now points to first / after ://
|
|
if (string::npos != pos)
|
|
{
|
|
url.erase(pos);
|
|
}
|
|
|
|
url += location;
|
|
return url;
|
|
}
|
|
else
|
|
{
|
|
return location;
|
|
}
|
|
}
|