monstercat-downloader/curl_dl.cpp

#include "curl_dl.h"

#include <thread>
#include <sstream>

#include <string.h>

using std::chrono::duration;
using std::chrono::time_point;
using std::chrono::steady_clock;
using std::chrono::milliseconds;
using std::map;
using std::ostream;
using std::string;
using std::stringstream;
using std::this_thread::sleep_for;
using std::vector;
using nlohmann::json;

static size_t curl_write_func(void *ptr, size_t size, size_t nmemb, void *userdata)
{
	ostream *out = static_cast<ostream*>(userdata);
	size_t bytes = 0;

	if (nullptr == out)
	{
		return nmemb * size;
	}

	for (size_t idx = 0; idx < nmemb * size; ++idx)
	{
		(*out) << static_cast<char*>(ptr)[idx];
		++bytes;
	}

	return bytes;
}

static size_t curl_header_func(void *ptr, size_t size, size_t nmemb, void *userdata)
{
	map<string, string> *out = static_cast<map<string, string> *>(userdata);
	stringstream helper;
	size_t bytes = 0;
	string full;
	string name;
	string value;
	size_t pos;
	constexpr char whitespace[] = " \r\n\t\f\v";

	if (nullptr == out)
	{
		return nmemb * size;
	}

	for (size_t idx = 0; idx < nmemb * size; ++idx)
	{
		helper << static_cast<char*>(ptr)[idx];
		++bytes;
	}

	// Split into 2 strings
	full = helper.str();
	pos = full.find(':');
	name = full.substr(0, pos);
	value = "";

	if (pos != string::npos)
	{
		value = full.substr(pos + 1);
	}

	// Clean whitespace
	pos = name.find_first_not_of(whitespace);
	name.erase(0, pos);
	pos = name.find_last_not_of(whitespace);
	name.erase(pos + 1);

	pos = value.find_first_not_of(whitespace);
	value.erase(0, pos);
	pos = value.find_last_not_of(whitespace);
	value.erase(pos + 1);

	// Make header name lowercase
	for (int i = 0; i < name.size(); ++i)
	{
		if ((name[i] >= 'A') && (name[i] <= 'Z'))
		{
			name[i] += 'a' - 'A';
		}
	}

	// Insert header
	(*out)[name] = value;

	return bytes;
}

void CURL_DL::open_curl()
{
	m_handle = curl_easy_init();
}

void CURL_DL::check_rate_limit(const RateLimit& limit)
{
	if (!limit.m_name.empty() && limit.m_ms > 0)
	{
		if (m_limits.count(limit.m_name) != 0)
		{
			time_point<steady_clock> wait_end = m_limits[limit.m_name] + duration<int, std::milli>(limit.m_ms);
			time_point<steady_clock> now = steady_clock::now();

			if (now < wait_end)
			{
				sleep_for(wait_end - now);
			}
		}
	}
}

void CURL_DL::save_rate_limit(const RateLimit& limit)
{
	if (!limit.m_name.empty())
	{
		m_limits[limit.m_name] = steady_clock::now();
	}
}

CURL_DL::CURL_DL()
: m_handle(nullptr)
{
}

CURL_DL::~CURL_DL()
{
	if (m_handle != nullptr)
	{
		curl_easy_cleanup(m_handle);
		m_handle = nullptr;
	}
}

CURL_DL& CURL_DL::get_handle()
{
	static CURL_DL obj;
	return obj;
}

bool CURL_DL::download(const string& url, ostream* out, RateLimit limit)
{
	return download(url, out, nullptr, nullptr, nullptr, limit);
}

bool CURL_DL::download(const string& url, ostream* out,
	const vector<string> *headers, RateLimit limit)
{
	return download(url, out, headers, nullptr, nullptr, limit);
}

bool CURL_DL::download(const string& url, ostream* out,
	const vector<string> *headers, map<string, string> *out_headers,
	RateLimit limit)
{
	return download(url, out, headers, out_headers, nullptr, limit);
}

bool CURL_DL::download(const string& url, ostream* out,
	const vector<string> *headers, map<string, string> *out_headers,
	const map<string, string> *params, RateLimit limit)
{
	bool result = true;
	CURLcode error;
	struct curl_slist *header_list = nullptr;
	long http_code;

	if (nullptr == m_handle)
	{
		open_curl();
	}

	if (nullptr == m_handle)
	{
		return false;
	}

	curl_easy_reset(m_handle);

	// Enable cookie engine
	curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "cookies.txt");

	// Enable error messages
	curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);

	// Set User-Agent
	curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Downloader");

	if ((nullptr == params) || (params->count("no-redir") == 0))
	{
		// Set Auto Follow (Max 32 Times)
		curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
		curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);
	}
	else
	{
		curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 0);
	}

	// Allow only HTTP and HTTPS (STR after 7.85.0)
	// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
	curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);

	// Do Not Return "OK" On 4xx And 5xx
	curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 0);

	// Set Output Func
	curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
	curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);

	// Set URL
	curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());

	// Set Get
	curl_easy_setopt(m_handle, CURLOPT_HTTPGET, 1);

	// Set Headers
	if (nullptr != headers)
	{
		for (const string& s : *headers)
		{
			header_list = curl_slist_append(header_list, s.c_str());
		}
	}
	curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);

	if (nullptr != out_headers)
	{
		curl_easy_setopt(m_handle, CURLOPT_HEADERFUNCTION, curl_header_func);
		curl_easy_setopt(m_handle, CURLOPT_HEADERDATA, out_headers);
	}

	// Set Parameters
	if (nullptr != params)
	{
		// Username
		if (params->count("user") != 0)
		{
			curl_easy_setopt(m_handle, CURLOPT_USERNAME, (*params).at("user").c_str());
		}

		// Password
		if (params->count("pass") != 0)
		{
			curl_easy_setopt(m_handle, CURLOPT_PASSWORD, (*params).at("pass").c_str());
		}
	}

	check_rate_limit(limit);

	error = curl_easy_perform(m_handle);
	if (error != CURLE_OK)
	{
		result = false;
	}

	error = curl_easy_getinfo(m_handle, CURLINFO_RESPONSE_CODE, &http_code);
	if (error != CURLE_OK)
	{
		result = false;
	}

	if (http_code >= 400)
	{
		result = false;
	}

	save_rate_limit(limit);

	// Cleanup
	if (nullptr != header_list)
	{
		curl_slist_free_all(header_list);
	}
	header_list = nullptr;

	return result;
}

bool CURL_DL::post_json(const string& url, json& j, ostream* out, RateLimit limit)
{
	return post_json(url, j, out, nullptr, limit);
}

bool CURL_DL::post_json(const string& url, json& j, ostream* out,
	const vector<string> *headers, RateLimit limit)
{
	bool result = true;
	CURLcode error;
	struct curl_slist *header_list = nullptr;

	if (nullptr == m_handle)
	{
		open_curl();
	}

	if (nullptr == m_handle)
	{
		return false;
	}

	curl_easy_reset(m_handle);

	curl_easy_setopt(m_handle, CURLOPT_COOKIEFILE, "");

	curl_easy_setopt(m_handle, CURLOPT_ERRORBUFFER, m_error);

	// Set User-Agent
	curl_easy_setopt(m_handle, CURLOPT_USERAGENT, "Internedko Archiver");

	// Set Auto Follow (Max 32 Times)
	curl_easy_setopt(m_handle, CURLOPT_FOLLOWLOCATION, 1);
	curl_easy_setopt(m_handle, CURLOPT_MAXREDIRS, 32);

	// Allow only HTTP and HTTPS (STR after 7.85.0)
	// curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS_STR, "http,https");
	curl_easy_setopt(m_handle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);

	// Do Not Return "OK" On 4xx And 5xx
	curl_easy_setopt(m_handle, CURLOPT_FAILONERROR, 1);

	// Set Output Func
	curl_easy_setopt(m_handle, CURLOPT_WRITEFUNCTION, curl_write_func);
	curl_easy_setopt(m_handle, CURLOPT_WRITEDATA, out);

	// Set URL
	curl_easy_setopt(m_handle, CURLOPT_URL, url.c_str());

	// Set Post
	curl_easy_setopt(m_handle, CURLOPT_POST, 1);

	// Set Headers
	header_list = curl_slist_append(header_list, "Content-Type: application/json");
	if (nullptr != headers)
	{
		for (const string& s : *headers)
		{
			header_list = curl_slist_append(header_list, s.c_str());
		}
	}
	curl_easy_setopt(m_handle, CURLOPT_HTTPHEADER, header_list);

	// Set Data
	curl_easy_setopt(m_handle, CURLOPT_COPYPOSTFIELDS, j.dump().c_str());

	check_rate_limit(limit);

	error = curl_easy_perform(m_handle);
	if (error != CURLE_OK)
	{
		result = false;
	}

	save_rate_limit(limit);

	// Cleanup
	curl_slist_free_all(header_list);
	header_list = nullptr;

	return result;
}

string CURL_DL::get_error() const
{
	return m_error;
}

string CURL_DL::url_encode(const string& input)
{
	string result;
	char hex[3];

	hex[2] = '\0';

	for (int i = 0; i < input.size(); ++i)
	{
		if (((input[i] >= ' ') && (input[i] < '0')) ||
			((input[i] > '9') && (input[i] < 'A')) ||
			((input[i] > 'Z') && (input[i] < 'a')) ||
			((input[i] > 'z') && (input[i] <= '~')))
		{
			result += '%';
			sprintf(hex, "%X", input[i]);
			result += hex;
		}
		else
		{
			result += input[i];
		}
	}

	return result;
}

string CURL_DL::calc_redir(string url, const string& location)
{
	size_t pos;

	pos = url.find('?');
	if (string::npos != pos)
	{
		url.erase(pos);
	}

	if (location[0] == '/')
	{
		pos = url.find("://");
		if (string::npos == pos)
		{
			pos = url.find("/");
		}
		else
		{
			pos = url.find("/", pos + 3);
		}

		// Pos now points to first / after ://
		if (string::npos != pos)
		{
			url.erase(pos);
		}

		url += location;
		return url;
	}
	else
	{
		return location;
	}
}