The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

function tr(fun) {
    return (""+fun).replace(/    /g, "");
}

var doc = {_id:"_design/base", language:"javascript", views:{}};

doc.views.audio = {
    map:tr(function (doc) {
        if(doc.data.audio.length > 0)
            emit(doc._id, doc.data.audio);
    }),
};

doc.views.image = {
    map:tr(function (doc) {
        if(doc.data.image.length > 0)
            emit(doc._id, doc.data.image);
    }),
};

doc.views.pdf = {
    map:tr(function (doc) {
        if(doc.data.pdf.length > 0)
            emit(doc._id, doc.data.pdf);
    }),
};

doc.views.lists = {
    map:tr(function (doc) {
        var test, qq = [];
        if(doc.data.links.length) {
            test = doc.data.links;
            for (i = 0; i < test.length; i++) {
                if(test[i].match("List_of_")) {
                    qq[qq.length] = test[i];
                }
            }
        }
        if(qq.length)
            emit(qq, doc._id);
    }),
};

doc.views.files = {
    map: tr(function (doc) {
        var test, qq = [];
        if(doc.data.links.length) {
            test = doc.data.links;
            for (i = 0; i < test.length; i++) {
                if(test[i].match("File:")) {
                    qq[qq.length] = test[i];
                }
            }
        }
        if(qq.length)
            emit(qq, doc._id);
    }),
};

doc.views.tags = {
    map: tr(function (doc) {
        if(doc.data.tags.length > 0)
            emit(doc._id, doc.data.tags);
    }),
    reduce: tr(function (doc) {
        if(doc.data) {
            var prefix = doc.data.tags;
            if(prefix)
                emit(prefix, doc._id);
        }
    }),
};

doc.views.tagcount = {
    map: tr(function (doc) {
        if (doc.data.tags.length)
            emit(doc._id, doc.data.tags.length);
    }),
    reduce: tr(function(keys, values) {
        return sum(values);
    }),
};

doc.views.instances = {
    map: tr(function (doc) {
        if (doc && doc.data &&
            doc.data.instances[0] &&
            doc.data.instances[0].length > 0)
            emit(doc.data.instances, doc._id);
    }),
    reduce: tr(function(doc) {
        if(doc.data) {
            var prefix = doc.data.instances.length;
            if(prefix)
                emit(prefix, doc._id);
        }
    }),
};

doc.views.linkcount = {
    map: tr(function (doc) {
        if (doc.data.linknr)
            emit(doc._id, doc.data.linknr);
    }),
    reduce: tr(function(keys, values) {
        return sum(values);
    }),
};

doc.views.linkcountbig = {
    map: tr(function (doc) {
        if (doc.data.linknr > 1000)
            emit(doc._id, doc.data.linknr);
    }),
    reduce: tr(function(keys, values) {
        return sum(values);
    }),
};

doc.views.articles = {
    map: tr(function(doc) {
        var mediaratio = 0;
        if (doc.data) {
            var links = {};
            if(doc.data.pdf) {
                links.pdf = [];
                for(var u in doc.data.pdf) {
                    links.pdf.push(doc.data.pdf[u]);
                }
            }
            if(doc.data.image) {
                links.image = [];
                for(var u in doc.data.image) {
                    mediaratio++;
                    links.image.push("http://" + doc.data.image[u]);
                }
            }
            if(doc.data.headings) {
                links.headings = doc.data.headings;
            }
            if(doc.data.categories) {
                links.categories = doc.data.categories;
            }
            if(doc.data.members) {
                links.members = doc.data.members;
            }
            if(doc.data.cat) {
                links.cat = doc.data.cat;
            }
            if(doc.data.list) {
                links.list = doc.data.list;
            }
            if(doc.data.src) {
                links.src = doc.data.src;
            }
            if(doc.data.related) {
                links.related = doc.data.related;
            }
            if(doc.data.audio) {
                links.audio = [];
                for(var u in doc.data.audio) {
                    if(doc.data.audio[u].match(/^\/\/upload/)) {
                        mediaratio++;
                        links.audio.push("http:" + doc.data.audio[u]);
                    }
                }
            }
            emit(doc.data.instances, [doc._id, {
                media:mediaratio,
                instance:doc.data.members.length,
                tags:doc.data.tags.length,
                spawn:doc.data.linknr,
            }, doc.data.audio, links]);
        }
    }),
};


doc.views.scores = {
    map: tr(function (doc) {

var FuzzySet=(function(){return function(arr,useLevenshtein,gramSizeLower,gramSizeUpper){
    var fuzzyset = {
        version: '0.0.1c'
    };

    // default options
    arr = arr || [];
    fuzzyset.gramSizeLower = gramSizeLower || 2;
    fuzzyset.gramSizeUpper = gramSizeUpper || 3;
    fuzzyset.useLevenshtein = useLevenshtein || true;

    // define all the object functions and attributes
    fuzzyset.exactSet = {}
    fuzzyset.matchDict = {};
    fuzzyset.items = {};

    // helper functions
    var levenshtein = function(str1, str2) {
        var current = [], prev, value;

        for (var i = 0; i <= str2.length; i++)
            for (var j = 0; j <= str1.length; j++) {
            if (i && j)
                if (str1.charAt(j - 1) === str2.charAt(i - 1))
                value = prev;
                else
                value = Math.min(current[j], current[j - 1], prev) + 1;
            else
                value = i + j;

            prev = current[j];
            current[j] = value;
            }

        return current.pop();
    };

    // return an edit distance from 0 to 1
    var _distance = function(str1, str2) {
        if (str1 == null && str2 == null) throw 'Trying to compare two null values'
        if (str1 == null || str2 == null) return 0;
        str1 = String(str1); str2 = String(str2);

        var distance = levenshtein(str1, str2);
        if (str1.length > str2.length) {
            return 1 - distance / str1.length;
        } else {
            return 1 - distance / str2.length;
        }
    };
    var _nonWordRe = /[^\w, ]+/;

    var _iterateGrams = function(value, gramSize) {
        gramSize = gramSize || 2;
        var simplified = '-' + value.toLowerCase().replace(_nonWordRe, '') + '-',
            lenDiff = gramSize - simplified.length,
            results = [];
        if (lenDiff > 0) {
            for (var i = 0; i < lenDiff; ++i) {
                value += '-';
            }
        }
        for (var i = 0; i < simplified.length - gramSize + 1; ++i) {
            results.push(simplified.slice(i, i + gramSize))
        }
        return results;
    };

    var _gramCounter = function(value, gramSize) {
        gramSize = gramSize || 2;
        var result = {},
            grams = _iterateGrams(value, gramSize),
            i = 0;
        for (i; i < grams.length; ++i) {
            if (grams[i] in result) {
                result[grams[i]] += 1;
            } else {
                result[grams[i]] = 1;
            }
        }
        return result;
    };

    // the main functions
    fuzzyset.get = function(value, defaultValue) {
        var result = this._get(value);
        if (!result && defaultValue) {
            return defaultValue;
        }
        return result;
    };

    fuzzyset._get = function(value) {
        var normalizedValue = this._normalizeStr(value),
            result = this.exactSet[normalizedValue];
        if (result) {
            return [[1, result]];
        }
        var results = [];
        for (var gramSize = this.gramSizeUpper; gramSize > this.gramSizeLower; --gramSize) {
            results = this.__get(value, gramSize);
            if (results) {
                return results;
            }
        }
        return null;
    };

    fuzzyset.__get = function(value, gramSize) {
        var normalizedValue = this._normalizeStr(value),
            matches = {},
            gramCounts = _gramCounter(normalizedValue, gramSize),
            items = this.items[gramSize],
            sumOfSquareGramCounts = 0,
            gram,
            gramCount,
            i,
            index,
            otherGramCount;

        for (gram in gramCounts) {
            gramCount = gramCounts[gram];
            sumOfSquareGramCounts += Math.pow(gramCount, 2);
            if (gram in this.matchDict) {
                for (i = 0; i < this.matchDict[gram].length; ++i) {
                    index = this.matchDict[gram][i][0];
                    otherGramCount = this.matchDict[gram][i][1];
                    if (index in matches) {
                        matches[index] += gramCount * otherGramCount;
                    } else {
                        matches[index] = gramCount * otherGramCount;
                    }
                }
            }
        }

        function isEmptyObject(obj) {
            for(var prop in obj) {
                if(obj.hasOwnProperty(prop))
                    return false;
            }
            return true;
        }

        if (isEmptyObject(matches)) {
            return null;
        }

        var vectorNormal = Math.sqrt(sumOfSquareGramCounts),
            results = [],
            matchScore;
        // build a results list of [score, str]
        for (var matchIndex in matches) {
            matchScore = matches[matchIndex];
            results.push([matchScore / (vectorNormal * items[matchIndex][0]), items[matchIndex][1]]);
        }
        var sortDescending = function(a, b) {
            if (a[0] < b[0]) {
                return 1;
            } else if (a[0] > b[0]) {
                return -1;
            } else {
                return 0;
            }
        };
        results.sort(sortDescending);
        if (this.useLevenshtein) {
            var newResults = [],
                endIndex = results.length;
            for (var i = 0; i < endIndex; ++i) {
                newResults.push([_distance(results[i][1], normalizedValue), results[i][1]]);
            }
            results = newResults;
            results.sort(sortDescending);
        }
        var newResults = [];
        for (var i = 0; i < results.length; ++i) {
            if (results[i][0] == results[0][0]) {
                newResults.push([results[i][0], this.exactSet[results[i][1]]]);
            }
        }
        return newResults;
    };

    fuzzyset.add = function(value) {
        var normalizedValue = this._normalizeStr(value);
        if (normalizedValue in this.exactSet) {
            return false;
        }

        var i = this.gramSizeLower;
        for (i; i < this.gramSizeUpper + 1; ++i) {
            this._add(value, i);
        }
    };

    fuzzyset._add = function(value, gramSize) {
        var normalizedValue = this._normalizeStr(value),
            items = this.items[gramSize] || [],
            index = items.length;

        items.push(0);
        var gramCounts = _gramCounter(normalizedValue, gramSize),
            sumOfSquareGramCounts = 0,
            gram, gramCount;
        for (var gram in gramCounts) {
            gramCount = gramCounts[gram];
            sumOfSquareGramCounts += Math.pow(gramCount, 2);
            if (gram in this.matchDict) {
                this.matchDict[gram].push([index, gramCount]);
            } else {
                this.matchDict[gram] = [[index, gramCount]];
            }
        }
        var vectorNormal = Math.sqrt(sumOfSquareGramCounts);
        items[index] = [vectorNormal, normalizedValue];
        this.items[gramSize] = items;
        this.exactSet[normalizedValue] = value;
    };

    fuzzyset._normalizeStr = function(str) {
        if (Object.prototype.toString.call(str) !== '[object String]') throw 'Must use a string as argument to FuzzySet functions'
        return str.toLowerCase();
    };

    // return length of items in set
    fuzzyset.length = function() {
        var count = 0,
            prop;
        for (prop in this.exactSet) {
            if (this.exactSet.hasOwnProperty(prop)) {
                count += 1;
            }
        }
        return count;
    };

    // return is set is empty
    fuzzyset.isEmpty = function() {
        for (var prop in this.exactSet) {
            if (this.exactSet.hasOwnProperty(prop)) {
                return false;
            }
        }
        return true;
    };

    // return list of values loaded into set
    fuzzyset.values = function() {
        var values = [],
            prop;
        for (prop in this.exactSet) {
            if (this.exactSet.hasOwnProperty(prop)) {
                values.push(this.exactSet[prop])
            }
        }
        return values;
    };


    // initialization
    var i = fuzzyset.gramSizeLower;
    for (i; i < fuzzyset.gramSizeUpper + 1; ++i) {
        fuzzyset.items[i] = [];
    }
    // add all the items to the set
    for (i = 0; i < arr.length; ++i) {
        fuzzyset.add(arr[i]);
    }

    return fuzzyset;
};

})();

var density = function (arr) {
    var count = {}, buf = [],
        res = [], max = 0;
    arr.forEach(function (k) {
        count[k] = (count[k] || 0) + 1;
    });
    Object.keys(count).forEach(function (k) {
        max = Math.max(max, count[k]);
        buf.push([count[k],k]);
    });
    buf.sort(function(a, b) {
            if (a[0] < b[0])      return  1;
            else if (a[0] > b[0]) return -1;
            else                  return  0;
    });
    var blen = buf.length;
    for (var c = 0 ; c < max ; c++)
        for (var i = 0 ; i < blen ; i++)
            if(buf[i][0]-- > 0)
                res.push(buf[i][1]);
    return res;
};

        if (doc && doc.data &&
            doc.data.instances[0] &&
            doc.data.instances[0].length > 0) {
            var dense = density(doc.data.instances);
            emit(dense, FuzzySet(dense).get(doc._id) || [[0, doc._id]]);
        }
    }),
    reduce: tr(function(keys, values, rereduce) {
        var count = {};
        (keys || []).forEach(function (key) {
            key[0].forEach(function (k) {
                count[k] = (count[k] || 0) + 1;
            });
        });
        if (rereduce) {
            values.forEach(function (value) {
                value.forEach(function (v) {
                    v.forEach(function (k) {
                        count[k] = (count[k] || 0) + 1;
                    });
                });
            });
        }
        Object.keys(count).forEach(function (k) {
            if (count[k] < 2) delete count[k];
        });
        return count;
    }),
};

console.log(JSON.stringify(doc));