monstercat-downloader/curl_dl.cpp

438 lines
8.6 KiB
C++

#include "curl_dl.h"
#include <thread>
#include <sstream>
#include <string.h>
using std::chrono::duration;
using std::chrono::time_point;
using std::chrono::steady_clock;
using std::chrono::milliseconds;
using std::map;
using std::ostream;
using std::string;
using std::stringstream;
using std::this_thread::sleep_for;
using std::vector;
using nlohmann::json;
static size_t curl_write_func(void *ptr, size_t size, size_t nmemb, void *userdata)
{
ostream *out = static_cast<ostream*>(userdata);
size_t bytes = 0;
if (nullptr == out)
{
return nmemb * size;
}
for (size_t idx = 0; idx < nmemb * size; ++idx)
{
(*out) << static_cast<char*>(ptr)[idx];
++bytes;
}
return bytes;
}
static size_t curl_header_func(void *ptr, size_t size, size_t nmemb, void *userdata)
{
map<string, string> *out = static_cast<map<string, string> *>(userdata);
stringstream helper;
size_t bytes = 0;
string full;
string name;
string value;
size_t pos;
constexpr char whitespace[] = " \r\n\t\f\v";
if (nullptr == out)
{
return nmemb * size;
}
for (size_t idx = 0; idx < nmemb * size; ++idx)
{
helper << static_cast<char*>(ptr)[idx];
++bytes;
}
// Split into 2 strings
full = helper.str();
pos = full.find(':');
name = full.substr(0, pos);
value = "";
if (pos != string::npos)
{
value = full.substr(pos + 1);
}
// Clean whitespace
pos = name.find_first_not_of(whitespace);
name.erase(0, pos);
pos = name.find_last_not_of(whitespace);
name.erase(pos + 1);
pos = value.find_first_not_of(whitespace);
value.erase(0, pos);
pos = value.find_last_not_of(whitespace);
value.erase(pos + 1);
// Make header name lowercase
for (int i = 0; i < name.size(); ++i)
{
if ((name[i] >= 'A') && (name[i] <= 'Z'))
{
name[i] += 'a' - 'A';
}
}
// Insert header
(*out)[name] = value;
return bytes;
}
void CURL_DL::open_curl()
{
m_handle = curl_easy_init();
}
void CURL_DL::check_rate_limit(const RateLimit& limit)
{
if (!limit.m_name.empty() && limit.m_ms > 0)
{
if (m_limits.count(limit.m_name) != 0)
{
time_point<steady_clock> wait_end = m_limits[limit.m_name] + duration<int, std::milli>(limit.m_ms);
time_point<steady_clock> now = steady_clock::now();
if (now < wait_end)
{
sleep_for(wait_end - now);
}
}
}
}
void CURL_DL::save_rate_limit(const RateLimit& limit)
{
if (!limit.m_name.empty())
{
m_limits[limit.m_name] = steady_clock::now();
}
}
CURL_DL::CURL_DL()
: m_handle(nullptr)
{
}
CURL_DL::~CURL_DL()
{
if (m_handle != nullptr)
{
curl_easy_cleanup(m_handle);
m_handle = nullptr;
}
}
CURL_DL& CURL_DL::get_handle()
{
static CURL_DL obj;
return obj;
}
bool CURL_DL::download(const string& url, ostream* out, RateLimit limit)
{
return download(url, out, nullptr, nullptr, nullptr, limit);
}
bool CURL_DL::download(const string& url, ostream* out,
const vector<string> *headers, RateLimit limit)
{
return download(url, out, headers, nullptr, nullptr, limit);
}
bool CURL_DL::download(const string& url, ostream* out,
const vector<string> *headers, map<string, string> *out_headers,
RateLimit limit)
{
return download(url, out, headers, out_headers, nullptr, limit);
}
bool CURL_DL::download(const string& url, ostream* out,
const vector<string> *headers, map<string, string> *out_headers,
const map<string, string> *params, RateLimit limit)
{
bool result = true;
CURLcode error;
struct curl_slist *header_list = nullptr;
long http_code;
if (nullptr == m_handle)
{
open_curl();
}
if (nullptr == m_handle)
{
return false;
}
curl_easy_reset(m_handle);
// Enable cookie engine
curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "cookies.txt");
// Enable error messages
curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);
// Set User-Agent
curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Downloader");
if ((nullptr == params) || (params->count("no-redir") == 0))
{
// Set Auto Follow (Max 32 Times)
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);
}
else
{
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 0);
}
// Allow only HTTP and HTTPS (STR after 7.85.0)
// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
// Do Not Return "OK" On 4xx And 5xx
curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 0);
// Set Output Func
curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);
// Set URL
curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());
// Set Get
curl_easy_setopt(m_handle, CURLOPT_HTTPGET, 1);
// Set Headers
if (nullptr != headers)
{
for (const string& s : *headers)
{
header_list = curl_slist_append(header_list, s.c_str());
}
}
curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);
if (nullptr != out_headers)
{
curl_easy_setopt(m_handle, CURLOPT_HEADERFUNCTION, curl_header_func);
curl_easy_setopt(m_handle, CURLOPT_HEADERDATA, out_headers);
}
// Set Parameters
if (nullptr != params)
{
// Username
if (params->count("user") != 0)
{
curl_easy_setopt(m_handle, CURLOPT_USERNAME, (*params).at("user").c_str());
}
// Password
if (params->count("pass") != 0)
{
curl_easy_setopt(m_handle, CURLOPT_PASSWORD, (*params).at("pass").c_str());
}
}
check_rate_limit(limit);
error = curl_easy_perform(m_handle);
if (error != CURLE_OK)
{
result = false;
}
error = curl_easy_getinfo(m_handle, CURLINFO_RESPONSE_CODE, &http_code);
if (error != CURLE_OK)
{
result = false;
}
if (http_code >= 400)
{
result = false;
}
save_rate_limit(limit);
// Cleanup
if (nullptr != header_list)
{
curl_slist_free_all(header_list);
}
header_list = nullptr;
return result;
}
bool CURL_DL::post_json(const string& url, json& j, ostream* out, RateLimit limit)
{
return post_json(url, j, out, nullptr, limit);
}
bool CURL_DL::post_json(const string& url, json& j, ostream* out,
const vector<string> *headers, RateLimit limit)
{
bool result = true;
CURLcode error;
struct curl_slist *header_list = nullptr;
if (nullptr == m_handle)
{
open_curl();
}
if (nullptr == m_handle)
{
return false;
}
curl_easy_reset(m_handle);
curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "");
curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);
// Set User-Agent
curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Archiver");
// Set Auto Follow (Max 32 Times)
curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);
// Allow only HTTP and HTTPS (STR after 7.85.0)
// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
// Do Not Return "OK" On 4xx And 5xx
curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 1);
// Set Output Func
curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);
// Set URL
curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());
// Set Post
curl_easy_setopt(m_handle, CURLOPT_POST, 1);
// Set Headers
header_list = curl_slist_append(header_list, "Content-Type: application/json");
if (nullptr != headers)
{
for (const string& s : *headers)
{
header_list = curl_slist_append(header_list, s.c_str());
}
}
curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);
// Set Data
curl_easy_setopt(m_handle, CURLOPT_COPYPOSTFIELDS, j.dump().c_str());
check_rate_limit(limit);
error = curl_easy_perform(m_handle);
if (error != CURLE_OK)
{
result = false;
}
save_rate_limit(limit);
// Cleanup
curl_slist_free_all(header_list);
header_list = nullptr;
return result;
}
string CURL_DL::get_error() const
{
return m_error;
}
string CURL_DL::url_encode(const string& input)
{
string result;
char hex[3];
hex[2] = '\0';
for (int i = 0; i < input.size(); ++i)
{
if (((input[i] >= ' ') && (input[i] < '0')) ||
((input[i] > '9') && (input[i] < 'A')) ||
((input[i] > 'Z') && (input[i] < 'a')) ||
((input[i] > 'z') && (input[i] <= '~')))
{
result += '%';
sprintf(hex, "%X", input[i]);
result += hex;
}
else
{
result += input[i];
}
}
return result;
}
string CURL_DL::calc_redir(string url, const string& location)
{
size_t pos;
pos = url.find('?');
if (string::npos != pos)
{
url.erase(pos);
}
if (location[0] == '/')
{
pos = url.find("://");
if (string::npos == pos)
{
pos = url.find("/");
}
else
{
pos = url.find("/", pos + 3);
}
// Pos now points to first / after ://
if (string::npos != pos)
{
url.erase(pos);
}
url += location;
return url;
}
else
{
return location;
}
}