diff --git a/codegen.sh b/codegen.sh new file mode 100755 index 0000000..6094d51 --- /dev/null +++ b/codegen.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# +# To make it easy to use with GNU Parallel, e.g., +# parallel codegen.sh ::: *.mp3 +# +echoprint-codegen -h "$1" > "$1.json" diff --git a/src/Codegen.cxx b/src/Codegen.cxx index b391b38..df9a62c 100644 --- a/src/Codegen.cxx +++ b/src/Codegen.cxx @@ -23,6 +23,10 @@ using std::string; using std::vector; Codegen::Codegen(const float* pcm, unsigned int numSamples, int start_offset) { + for (int i = 0; i < 2; ++i) { + is_code_string_cached[i] = false; + } + if (Params::AudioStreamInput::MaxSamples < (uint)numSamples) throw std::runtime_error("File was too big\n"); @@ -38,7 +42,11 @@ Codegen::Codegen(const float* pcm, unsigned int numSamples, int start_offset) { Fingerprint *pFingerprint = new Fingerprint(pSubbandAnalysis, start_offset); pFingerprint->Compute(); +#if defined(UNHASHED_CODES) + _CodeString = createCodeStringJSON(pFingerprint->getCodes()); +#else _CodeString = createCodeString(pFingerprint->getCodes()); +#endif _NumCodes = pFingerprint->getCodes().size(); delete pFingerprint; @@ -63,6 +71,24 @@ string Codegen::createCodeString(vector vCodes) { return compress(codestream.str()); } +string Codegen::createCodeStringJSON(vector vCodes) { + std::ostringstream codestream; + codestream << "["; + for (uint i = 0; i < vCodes.size(); i++) { + int hash = vCodes[i].code; + // codestream << std::setw(5) << hash; + codestream << "[" << vCodes[i].frame << ", " + << ((hash >> 20) & 7) << ", " + << ((hash >> 10) & 1023) << ", " + << ((hash >> 0) & 1023) + << "]"; + if (i < vCodes.size()-1) { + codestream << ", "; + } + } + codestream << "]"; + return codestream.str(); +} string Codegen::compress(const string& s) { long max_compressed_length = s.size()*2; @@ -89,3 +115,24 @@ string Codegen::compress(const string& s) { delete [] compressed; return encoded; } + +std::string Codegen::getCodeString(bool human_readable) { + const uint n = human_readable; + if (!is_code_string_cached[n]) { + is_code_string_cached[n] = true; + if (human_readable) { + if (_CodeString.size() > 0) { + code_string_cache[n] = _CodeString; + } else { + code_string_cache[n] = "[]"; + } + } else { + if (_CodeString.size() > 0) { + code_string_cache[n] = '"' + compress(_CodeString) + '"'; + } else { + code_string_cache[n] = "\"\""; + } + } + } + return code_string_cache[n]; +} diff --git a/src/Codegen.h b/src/Codegen.h index 5923d51..4909ade 100644 --- a/src/Codegen.h +++ b/src/Codegen.h @@ -33,16 +33,20 @@ class CODEGEN_API Codegen { public: Codegen(const float* pcm, unsigned int numSamples, int start_offset); - std::string getCodeString(){return _CodeString;} + std::string getCodeString(bool human_readable); + int getNumCodes(){return _NumCodes;} static double getVersion() { return ECHOPRINT_VERSION; } private: Fingerprint* computeFingerprint(SubbandAnalysis *pSubbandAnalysis, int start_offset); std::string createCodeString(std::vector vCodes); + std::string createCodeStringJSON(std::vector vCodes); std::string compress(const std::string& s); std::string _CodeString; int _NumCodes; + bool is_code_string_cached[2]; + std::string code_string_cache[2]; }; #endif diff --git a/src/Fingerprint.cxx b/src/Fingerprint.cxx index 7457170..1ddd0be 100644 --- a/src/Fingerprint.cxx +++ b/src/Fingerprint.cxx @@ -12,6 +12,8 @@ #include "win_funcs.h" #endif +#define SATURATE(var, val) if ((var) > (val)) (var) = (val); + unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) { // MurmurHash2, by Austin Appleby http://sites.google.com/site/murmurhash/ // m and r are constants set by austin @@ -182,13 +184,18 @@ uint Fingerprint::quantized_time_for_frame_absolute(uint frame) { void Fingerprint::Compute() { uint actual_codes = 0; +#if !defined(UNHASHED_CODES) unsigned char hash_material[5]; for(uint i=0;i<5;i++) hash_material[i] = 0; +#endif uint * onset_counter_for_band; matrix_u out; uint onset_count = adaptiveOnsets(345, out, onset_counter_for_band); _Codes.resize(onset_count*6); +#if defined(UNHASHED_CODES) + assert(SUBBANDS <= 8); +#endif for(unsigned char band=0;band2) { for(uint onset=0;onset +#include +#include +#ifndef _WIN32 + #include + #include +#endif +#include +#include + +#include "AudioStreamInput.h" +#include "Metadata.h" +#include "Codegen.h" +#include +#include "functions.h" + +using namespace std; + + +// deal with quotes etc in json +std::string escape(const string& value) { + std::string s(value); + std::string out = ""; + out.reserve(s.size()); + for (size_t i = 0; i < s.size(); i++) { + char c = s[i]; + if ((unsigned char)c < 31) + continue; + + switch (c) { + case '"' : out += "\\\""; break; + case '\\': out += "\\\\"; break; + case '\b': out += "\\b" ; break; + case '\f': out += "\\f" ; break; + case '\n': out += "\\n" ; break; + case '\r': out += "\\r" ; break; + case '\t': out += "\\t" ; break; + // case '/' : out += "\\/" ; break; // Unnecessary? + default: + out += c; + // TODO: do something with unicode? + } + } + + return out; +} + + +codegen_response_t *codegen_file(char* filename, int start_offset, int duration, int tag) { + // Given a filename, perform a codegen on it and get the response + // This is called by a thread + double t1 = now(); + codegen_response_t *response = (codegen_response_t *)malloc(sizeof(codegen_response_t)); + response->error = NULL; + response->codegen = NULL; + + auto_ptr pAudio(new FfmpegStreamInput()); + pAudio->ProcessFile(filename, start_offset, duration); + + if (pAudio.get() == NULL) { // Unable to decode! + char* output = (char*) malloc(16384); + sprintf(output,"{\"error\":\"could not create decoder\", \"tag\":%d, \"metadata\":{\"filename\":\"%s\"}}", + tag, + escape(filename).c_str()); + response->error = output; + return response; + } + + int numSamples = pAudio->getNumSamples(); + + if (numSamples < 1) { + char* output = (char*) malloc(16384); + sprintf(output,"{\"error\":\"could not decode\", \"tag\":%d, \"metadata\":{\"filename\":\"%s\"}}", + tag, + escape(filename).c_str()); + response->error = output; + return response; + } + t1 = now() - t1; + + double t2 = now(); + Codegen *pCodegen = new Codegen(pAudio->getSamples(), numSamples, start_offset); + t2 = now() - t2; + + response->t1 = t1; + response->t2 = t2; + response->numSamples = numSamples; + response->codegen = pCodegen; + response->start_offset = start_offset; + response->duration = duration; + response->tag = tag; + response->filename = filename; + + return response; +} + +char *make_json_string(codegen_response_t* response, bool human_readable_code) { + + if (response->error != NULL) { + return response->error; + } + + // Get the ID3 tag information. + auto_ptr pMetadata(new Metadata(response->filename)); + + // preamble + codelen + char* output = (char*) malloc(sizeof(char)*(16384 + strlen(response->codegen->getCodeString(human_readable_code).c_str()) )); + + sprintf(output,"{\"metadata\":{\"artist\":\"%s\", \"release\":\"%s\", \"title\":\"%s\", \"genre\":\"%s\", \"bitrate\":%d," + "\"sample_rate\":%d, \"duration\":%d, \"filename\":\"%s\", \"samples_decoded\":%d, \"given_duration\":%d," + " \"start_offset\":%d, \"version\":%2.2f, \"codegen_time\":%2.6f, \"decode_time\":%2.6f}, \"code_count\":%d," + " \"code\":%s, \"tag\":%d}", + escape(pMetadata->Artist()).c_str(), + escape(pMetadata->Album()).c_str(), + escape(pMetadata->Title()).c_str(), + escape(pMetadata->Genre()).c_str(), + pMetadata->Bitrate(), + pMetadata->SampleRate(), + pMetadata->Seconds(), + escape(response->filename).c_str(), + response->numSamples, + response->duration, + response->start_offset, + response->codegen->getVersion(), + response->t2, + response->t1, + response->codegen->getNumCodes(), + response->codegen->getCodeString(human_readable_code).c_str(), + response->tag + ); + return output; +} diff --git a/src/functions.h b/src/functions.h new file mode 100644 index 0000000..d252d16 --- /dev/null +++ b/src/functions.h @@ -0,0 +1,32 @@ +#include +#include +#include +#ifndef _WIN32 + #include + #include +#endif +#include +#include + +#include "AudioStreamInput.h" +#include "Metadata.h" +#include "Codegen.h" +#include + +// The response from the codegen. Contains all the fields necessary +// to create a json string. +typedef struct { + char *error; + char *filename; + int start_offset; + int duration; + int tag; + double t1; + double t2; + int numSamples; + Codegen* codegen; +} codegen_response_t; + +codegen_response_t *codegen_file(char*, int, int, int); +std::string escape(const string& value); +char *make_json_string(codegen_response_t* response, bool human_readable_code); diff --git a/src/main.cxx b/src/main.cxx index 8b2b389..7d2dc7e 100644 --- a/src/main.cxx +++ b/src/main.cxx @@ -18,24 +18,12 @@ #include "Metadata.h" #include "Codegen.h" #include +#include "functions.h" + #define MAX_FILES 200000 using namespace std; -// The response from the codegen. Contains all the fields necessary -// to create a json string. -typedef struct { - char *error; - char *filename; - int start_offset; - int duration; - int tag; - double t1; - double t2; - int numSamples; - Codegen* codegen; -} codegen_response_t; - // Struct to pass to the worker threads typedef struct { char *filename; @@ -80,81 +68,8 @@ int getNumCores() { #endif } -// deal with quotes etc in json -std::string escape(const string& value) { - std::string s(value); - std::string out = ""; - out.reserve(s.size()); - for (size_t i = 0; i < s.size(); i++) { - char c = s[i]; - if ((unsigned char)c < 31) - continue; - - switch (c) { - case '"' : out += "\\\""; break; - case '\\': out += "\\\\"; break; - case '\b': out += "\\b" ; break; - case '\f': out += "\\f" ; break; - case '\n': out += "\\n" ; break; - case '\r': out += "\\r" ; break; - case '\t': out += "\\t" ; break; - // case '/' : out += "\\/" ; break; // Unnecessary? - default: - out += c; - // TODO: do something with unicode? - } - } - - return out; -} - -codegen_response_t *codegen_file(char* filename, int start_offset, int duration, int tag) { - // Given a filename, perform a codegen on it and get the response - // This is called by a thread - double t1 = now(); - codegen_response_t *response = (codegen_response_t *)malloc(sizeof(codegen_response_t)); - response->error = NULL; - response->codegen = NULL; - - auto_ptr pAudio(new FfmpegStreamInput()); - pAudio->ProcessFile(filename, start_offset, duration); - - if (pAudio.get() == NULL) { // Unable to decode! - char* output = (char*) malloc(16384); - sprintf(output,"{\"error\":\"could not create decoder\", \"tag\":%d, \"metadata\":{\"filename\":\"%s\"}}", - tag, - escape(filename).c_str()); - response->error = output; - return response; - } - int numSamples = pAudio->getNumSamples(); - if (numSamples < 1) { - char* output = (char*) malloc(16384); - sprintf(output,"{\"error\":\"could not decode\", \"tag\":%d, \"metadata\":{\"filename\":\"%s\"}}", - tag, - escape(filename).c_str()); - response->error = output; - return response; - } - t1 = now() - t1; - - double t2 = now(); - Codegen *pCodegen = new Codegen(pAudio->getSamples(), numSamples, start_offset); - t2 = now() - t2; - - response->t1 = t1; - response->t2 = t2; - response->numSamples = numSamples; - response->codegen = pCodegen; - response->start_offset = start_offset; - response->duration = duration; - response->tag = tag; - response->filename = filename; - - return response; -} void *threaded_codegen_file(void *parm) { @@ -172,7 +87,7 @@ void print_json_to_screen(char* output, int count, int done) { if(done==1 && count>1) { printf("[\n%s,\n", output); } else if(done==1 && count == 1) { - printf("[\n%s\n]\n", output); + printf("%s\n", output); } else if(done == count) { printf("%s\n]\n", output); } else { @@ -180,46 +95,39 @@ void print_json_to_screen(char* output, int count, int done) { } } -char *make_json_string(codegen_response_t* response) { - - if (response->error != NULL) { - return response->error; +// Return true if has specified flag, false otherwise. Remove specified flags. +bool extract_flag(int* p_argc, char*** p_argv, const char* flag) { + int argc = *p_argc; + char** const argv = *p_argv; + for (int i = 1; i < argc; ++i) { + if (strcmp(flag, argv[i]) == 0) { + for (int k = i+1; k < argc; ++k) { + *(*p_argv+k-1) = *(*p_argv+k); + } + --argc; + } } - - // Get the ID3 tag information. - auto_ptr pMetadata(new Metadata(response->filename)); - - // preamble + codelen - char* output = (char*) malloc(sizeof(char)*(16384 + strlen(response->codegen->getCodeString().c_str()) )); - - sprintf(output,"{\"metadata\":{\"artist\":\"%s\", \"release\":\"%s\", \"title\":\"%s\", \"genre\":\"%s\", \"bitrate\":%d," - "\"sample_rate\":%d, \"duration\":%d, \"filename\":\"%s\", \"samples_decoded\":%d, \"given_duration\":%d," - " \"start_offset\":%d, \"version\":%2.2f, \"codegen_time\":%2.6f, \"decode_time\":%2.6f}, \"code_count\":%d," - " \"code\":\"%s\", \"tag\":%d}", - escape(pMetadata->Artist()).c_str(), - escape(pMetadata->Album()).c_str(), - escape(pMetadata->Title()).c_str(), - escape(pMetadata->Genre()).c_str(), - pMetadata->Bitrate(), - pMetadata->SampleRate(), - pMetadata->Seconds(), - escape(response->filename).c_str(), - response->numSamples, - response->duration, - response->start_offset, - response->codegen->getVersion(), - response->t2, - response->t1, - response->codegen->getNumCodes(), - response->codegen->getCodeString().c_str(), - response->tag - ); - return output; -} + const bool changed = (*p_argc != argc); + *p_argc = argc; + return changed; + } + +bool take_human_readable_flag(int* p_argc, char*** p_argv) { + return extract_flag(p_argc, p_argv, "-h"); + } int main(int argc, char** argv) { + const bool human_readable_code = take_human_readable_flag(&argc, &argv); + if (argc < 2) { - fprintf(stderr, "Usage: %s [ filename | -s ] [seconds_start] [seconds_duration] [< file_list (if -s is set)]\n", argv[0]); + const char left_margin[] = " "; + fprintf(stderr, + "Usage:\n" + "%s%s [ filename | -s ] [seconds_start] [seconds_duration] " + "[< file_list (if -s is set)]\n", + left_margin, argv[0]); + fputs("OPTIONS\n", stderr); + fprintf(stderr, "%s-h\tHuman-readable code (in contrast to the default""base64 encoded zlib compressed code)\n", left_margin); exit(-1); } @@ -254,7 +162,8 @@ int main(int argc, char** argv) { // Threading doesn't work in windows yet. for(int i=0;icodegen) { delete response->codegen; @@ -303,7 +212,7 @@ int main(int argc, char** argv) { parm[i]->done = 0; done++; codegen_response_t *response = (codegen_response_t*)parm[i]->response; - char *json = make_json_string(response); + char *json = make_json_string(response, human_readable_code); print_json_to_screen(json, count, done); if (response->codegen) { delete response->codegen; @@ -345,3 +254,5 @@ int main(int argc, char** argv) { } } + +