[Neo4j] Index Performance(Was: Event framework has landed)
Atle Prange
atle.prange at gmail.com
Wed Jun 23 21:19:01 CEST 2010
Hm, i'll have to fix that...
Any thoughts on a Trie implementation? Would it be able to compete?
atle
On Wed, Jun 23, 2010 at 11:04 AM, Mattias Persson
<mattias at neotechnology.com> wrote:
> I think the lucene test is flawed since it never returns any results in
> lookup method. That's why it's so fast :)
>
> 2010/6/22 Atle Prange <atle.prange at gmail.com>
>
>> Started a new thread since the old got a bit long, if you want to
>> catch up read the thread "The event framework has landed".
>>
>> Okay, i changed the tests to reflect a bit more realistic usage.
>>
>> The tests first inserts 1M entries to create a base of data. After
>> that it makes reads and writes 1000 entries a thousand times.
>>
>> BabuDB:
>> First million: 4s
>> 1000 inserts, 4ms
>> 1000 lookups: 30ms
>>
>> Lucene:
>> First million entries took 1 ms. This shows the async behavior of Lucene.
>> 1000 inserts: about 4 seconds (!)
>> 1000 lookups: under 1 ms.
>>
>> (All numbers extremely approximated, and the numbers can only be seen
>> as relative performance indicators)
>>
>>
>> This is what i excpected. Lucene is optimized towards collecting large
>> amount of data batchwise, and then handle many searches. (Correct me
>> if i am wrong)
>> BabuDB "just writes" data and "just reads" them later on.
>>
>> The test can of course be flawed.
>>
>> BabuDB test:
>>
>>
>>
>> package org.ogrm.test;
>>
>> import java.io.File;
>> import java.io.IOException;
>> import java.util.Iterator;
>> import java.util.Map.Entry;
>>
>> import org.apache.commons.lang.math.RandomUtils;
>> import org.xtreemfs.babudb.BabuDB;
>> import org.xtreemfs.babudb.BabuDBException;
>> import org.xtreemfs.babudb.BabuDBFactory;
>> import org.xtreemfs.babudb.config.BabuDBConfig;
>> import org.xtreemfs.babudb.log.DiskLogger.SyncMode;
>> import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup;
>> import org.xtreemfs.babudb.lsmdb.Database;
>>
>> public class TestBabuDb {
>>
>> private static Database db;
>>
>> public static void main( String[] args ) throws Exception {
>> deleteFileOrDirectory( new File( "babudb" ) );
>> BabuDB babuDb = BabuDBFactory.createBabuDB( new
>> BabuDBConfig(
>> "babudb/db", "babudb/log", 1, 1024 * 1024 * 20,
>> 10, SyncMode.ASYNC, 0, 0, false, 512, 1024 *
>> 1024 * 100 ) );
>> db = babuDb.getDatabaseManager().createDatabase( "test", 1
>> );
>> int init = 1000000;
>> int num = 1000;
>> int base = 0;
>> int iterations = 1000;
>> insert( init, base );
>> base = init;
>> for (int i = 0; i < iterations; i++) {
>> lookup( num, base );
>> insert( num, base );
>> base = base + num;
>> }
>>
>> db.shutdown();
>> babuDb.shutdown();
>> }
>>
>> private static byte[] fastToBytes( long value ) throws IOException {
>> byte[] array = new byte[8];
>> for (int i = 0; i < 8; i++) {
>> array[7 - i] = (byte) (value >>> (i * 8));
>> }
>> return array;
>> }
>>
>> private static long fastToLong( byte[] array ) throws IOException {
>> long value = 0;
>> for (int i = 0; i < array.length; i++) {
>> value <<= 8;
>> value ^= (long) array[i] & 0xFF;
>> }
>> return value;
>> }
>>
>> private static byte[] lookupKey( String key, Object value ) {
>> return String.valueOf( key + "|" + value + "|" ).getBytes();
>> }
>>
>> private static byte[] key( long id, String key, Object value ) {
>> return String.valueOf( key + "|" + value + "|" + id
>> ).getBytes();
>> }
>>
>> private static void lookup( int num, int start ) throws Exception {
>> long t = System.currentTimeMillis();
>> for (int i = start; i < (start + num); i++) {
>> Iterator<Entry<byte[], byte[]>> entries =
>> db.prefixLookup( 0,
>> lookupKey( "key", "value" + i ), null ).get();
>> while (entries.hasNext()) {
>> Entry<byte[], byte[]> entry =
>> entries.next();
>> fastToLong( entry.getValue() );
>> }
>> }
>> System.out.println( num + " lookups:" +
>> (System.currentTimeMillis() - t) );
>> }
>>
>> private static void insert( int num, int start ) throws Exception {
>> long t = System.currentTimeMillis();
>> BabuDBInsertGroup group = db.createInsertGroup();
>>
>> for (int i = start; i < (num + start); i++) {
>> long id = i;
>> group.addInsert( 0, key( id, "key", "value" + i %
>> 10000 ),
>> fastToBytes( id ) );
>> }
>> db.insert( group, null ).get();
>> System.out.println( "insert time (" + num + "):" +
>> (System.currentTimeMillis() - t) );
>> }
>>
>> public static void deleteFileOrDirectory( File file ) {
>> if (!file.exists()) {
>> return;
>> }
>>
>> if (file.isDirectory()) {
>> for (File child : file.listFiles()) {
>> deleteFileOrDirectory( child );
>> }
>> file.delete();
>> } else {
>> file.delete();
>> }
>> }
>>
>> private static long randomId() {
>> return RandomUtils.nextLong();
>> }
>> }
>>
>>
>> TestLucene
>>
>>
>> package org.ogrm.test;
>>
>> import java.io.File;
>> import java.io.IOException;
>>
>> import org.apache.lucene.analysis.KeywordAnalyzer;
>> import org.apache.lucene.document.Document;
>> import org.apache.lucene.document.Field;
>> import org.apache.lucene.document.Field.Index;
>> import org.apache.lucene.document.Field.Store;
>> import org.apache.lucene.index.IndexReader;
>> import org.apache.lucene.index.IndexWriter;
>> import org.apache.lucene.index.Term;
>> import org.apache.lucene.index.IndexWriter.MaxFieldLength;
>> import org.apache.lucene.search.IndexSearcher;
>> import org.apache.lucene.search.Query;
>> import org.apache.lucene.search.ScoreDoc;
>> import org.apache.lucene.search.TermQuery;
>> import org.apache.lucene.search.TopDocs;
>> import org.apache.lucene.store.Directory;
>> import org.apache.lucene.store.FSDirectory;
>>
>> public class TestLucene {
>> private static IndexWriter writer;
>> private static IndexSearcher searcher;
>>
>> public static void main( String[] args ) throws Exception {
>> File path = new File( "lcn" );
>> deleteFileOrDirectory( path );
>> Directory dir = FSDirectory.open( path );
>> writer = new IndexWriter( dir, new KeywordAnalyzer(),
>> MaxFieldLength.UNLIMITED );
>> writer.setMaxBufferedDocs( 100000 );
>>
>> IndexReader reader = writer.getReader();
>> searcher = new IndexSearcher( reader );
>>
>> int init = 1000000;
>> int num = 1000;
>> int base = 0;
>> int iterations = 1000;
>> insert( init, base );
>> base = init;
>> for (int i = 0; i < iterations; i++) {
>> lookup( num, base );
>> insert( num, base );
>> base = base + num;
>> }
>> }
>>
>> public static void deleteFileOrDirectory( File file ) {
>> if (!file.exists()) {
>> return;
>> }
>>
>> if (file.isDirectory()) {
>> for (File child : file.listFiles()) {
>> deleteFileOrDirectory( child );
>> }
>> file.delete();
>> } else {
>> file.delete();
>> }
>> }
>>
>> private static void insert( int num, int base ) throws Exception {
>> long t = System.currentTimeMillis();
>> for (int i = num; i < (num + base); i++) {
>> Document doc = new Document();
>> doc.add( new Field( "_id_", fastToBytes( i ),
>> Store.YES ) );
>> doc.add( new Field( "key", "value" + i % 10000,
>> Store.NO,
>> Index.NOT_ANALYZED ) );
>> writer.addDocument( doc );
>> }
>> writer.commit();
>> System.out.println( num + " inserts in " +
>> (System.currentTimeMillis() - t) );
>> }
>>
>> private static void lookup( int num, int base ) throws Exception {
>> long t = System.currentTimeMillis();
>> for (int i = 0; i < 100; i++) {
>> Query query = new TermQuery( new Term( "key",
>> "value" + (i + 2500000) ) );
>> TopDocs docs = searcher.search( query, 100 );
>> for (ScoreDoc scoreDoc : docs.scoreDocs) {
>> Document doc = searcher.doc( scoreDoc.doc );
>> fastToLong( doc.getBinaryValue( "_id_" ) );
>> }
>> }
>> System.out.println( num + " get " +
>> (System.currentTimeMillis() - t) );
>> }
>>
>> private static byte[] fastToBytes( long value ) throws IOException {
>> byte[] array = new byte[8];
>> for (int i = 0; i < 8; i++) {
>> array[7 - i] = (byte) (value >>> (i * 8));
>> }
>> return array;
>> }
>>
>> private static long fastToLong( byte[] array ) throws IOException {
>> long value = 0;
>> for (int i = 0; i < array.length; i++) {
>> value <<= 8;
>> value ^= (long) array[i] & 0xFF;
>> }
>> return value;
>> }
>> }
>>
>>
>>
>> -atle
>> _______________________________________________
>> Neo4j mailing list
>> User at lists.neo4j.org
>> https://lists.neo4j.org/mailman/listinfo/user
>>
>
>
>
> --
> Mattias Persson, [mattias at neotechnology.com]
> Hacker, Neo Technology
> www.neotechnology.com
> _______________________________________________
> Neo4j mailing list
> User at lists.neo4j.org
> https://lists.neo4j.org/mailman/listinfo/user
>
More information about the User
mailing list