[Neo4j] Index Performance(Was: Event framework has landed)

Atle Prange atle.prange at gmail.com
Thu Jun 24 11:29:25 CEST 2010


I guess i have to brush the dust of my tree knowledge then...

-atle

On Thu, 2010-06-24 at 09:43 +0200, Mattias Persson wrote:
> 2010/6/23 Atle Prange <atle.prange at gmail.com>
> 
> > Hm, i'll have to fix that...
> >
> > Any thoughts on a Trie implementation? Would it be able to compete?
> >
> > I have no idea on performance or what would be the best approach. I though
> your alphabet-relationship-types approach sounded quite interesting. Or as a
> b-tree or some other ??-tree.
> >
> > atle
> >
> >
> >
> >
> > On Wed, Jun 23, 2010 at 11:04 AM, Mattias Persson
> > <mattias at neotechnology.com> wrote:
> > > I think the lucene test is flawed since it never returns any results in
> > > lookup method. That's why it's so fast :)
> > >
> > > 2010/6/22 Atle Prange <atle.prange at gmail.com>
> > >
> > >> Started a new thread since the old got a bit long, if you want to
> > >> catch up read the thread "The event framework has landed".
> > >>
> > >> Okay, i changed the tests to reflect a bit more realistic usage.
> > >>
> > >> The tests first inserts 1M entries to create a base of data. After
> > >> that it makes reads and writes 1000 entries a thousand times.
> > >>
> > >> BabuDB:
> > >> First million: 4s
> > >> 1000 inserts, 4ms
> > >> 1000 lookups: 30ms
> > >>
> > >> Lucene:
> > >> First million entries took 1 ms. This shows the async behavior of
> > Lucene.
> > >> 1000 inserts: about 4 seconds (!)
> > >> 1000 lookups: under 1 ms.
> > >>
> > >> (All numbers extremely approximated, and the numbers can only be seen
> > >> as relative performance indicators)
> > >>
> > >>
> > >> This is what i excpected. Lucene is optimized towards collecting large
> > >> amount of data batchwise, and then handle many searches. (Correct me
> > >> if i am wrong)
> > >> BabuDB "just writes" data and "just reads" them later on.
> > >>
> > >> The test can of course be flawed.
> > >>
> > >> BabuDB test:
> > >>
> > >>
> > >>
> > >> package org.ogrm.test;
> > >>
> > >> import java.io.File;
> > >> import java.io.IOException;
> > >> import java.util.Iterator;
> > >> import java.util.Map.Entry;
> > >>
> > >> import org.apache.commons.lang.math.RandomUtils;
> > >> import org.xtreemfs.babudb.BabuDB;
> > >> import org.xtreemfs.babudb.BabuDBException;
> > >> import org.xtreemfs.babudb.BabuDBFactory;
> > >> import org.xtreemfs.babudb.config.BabuDBConfig;
> > >> import org.xtreemfs.babudb.log.DiskLogger.SyncMode;
> > >> import org.xtreemfs.babudb.lsmdb.BabuDBInsertGroup;
> > >> import org.xtreemfs.babudb.lsmdb.Database;
> > >>
> > >> public class TestBabuDb {
> > >>
> > >>        private static Database db;
> > >>
> > >>        public static void main( String[] args ) throws Exception {
> > >>                deleteFileOrDirectory( new File( "babudb" ) );
> > >>                BabuDB babuDb = BabuDBFactory.createBabuDB( new
> > >> BabuDBConfig(
> > >> "babudb/db", "babudb/log", 1, 1024 * 1024 * 20,
> > >>                                10, SyncMode.ASYNC, 0, 0, false, 512,
> > 1024 *
> > >> 1024 * 100 ) );
> > >>                db = babuDb.getDatabaseManager().createDatabase( "test",
> > 1
> > >> );
> > >>                int init = 1000000;
> > >>                int num = 1000;
> > >>                int base = 0;
> > >>                int iterations = 1000;
> > >>                insert( init, base );
> > >>                base = init;
> > >>                for (int i = 0; i < iterations; i++) {
> > >>                        lookup( num, base );
> > >>                        insert( num, base );
> > >>                        base = base + num;
> > >>                }
> > >>
> > >>                db.shutdown();
> > >>                babuDb.shutdown();
> > >>        }
> > >>
> > >>        private static byte[] fastToBytes( long value ) throws
> > IOException {
> > >>                byte[] array = new byte[8];
> > >>                for (int i = 0; i < 8; i++) {
> > >>                        array[7 - i] = (byte) (value >>> (i * 8));
> > >>                }
> > >>                return array;
> > >>        }
> > >>
> > >>        private static long fastToLong( byte[] array ) throws IOException
> > {
> > >>                long value = 0;
> > >>                for (int i = 0; i < array.length; i++) {
> > >>                        value <<= 8;
> > >>                        value ^= (long) array[i] & 0xFF;
> > >>                }
> > >>                return value;
> > >>        }
> > >>
> > >>        private static byte[] lookupKey( String key, Object value ) {
> > >>                return String.valueOf( key + "|" + value + "|"
> > ).getBytes();
> > >>        }
> > >>
> > >>        private static byte[] key( long id, String key, Object value ) {
> > >>                return String.valueOf( key + "|" + value + "|" + id
> > >> ).getBytes();
> > >>        }
> > >>
> > >>        private static void lookup( int num, int start ) throws Exception
> > {
> > >>                long t = System.currentTimeMillis();
> > >>                for (int i = start; i < (start + num); i++) {
> > >>                        Iterator<Entry<byte[], byte[]>> entries =
> > >> db.prefixLookup( 0,
> > >> lookupKey( "key", "value" + i ), null ).get();
> > >>                        while (entries.hasNext()) {
> > >>                                Entry<byte[], byte[]> entry =
> > >> entries.next();
> > >>                                fastToLong( entry.getValue() );
> > >>                        }
> > >>                }
> > >>                System.out.println( num + " lookups:" +
> > >> (System.currentTimeMillis() - t) );
> > >>        }
> > >>
> > >>        private static void insert( int num, int start ) throws Exception
> > {
> > >>                long t = System.currentTimeMillis();
> > >>                BabuDBInsertGroup group = db.createInsertGroup();
> > >>
> > >>                for (int i = start; i < (num + start); i++) {
> > >>                        long id = i;
> > >>                        group.addInsert( 0, key( id, "key", "value" + i %
> > >> 10000 ),
> > >> fastToBytes( id ) );
> > >>                }
> > >>                db.insert( group, null ).get();
> > >>                System.out.println( "insert time (" + num + "):" +
> > >> (System.currentTimeMillis() - t) );
> > >>        }
> > >>
> > >>        public static void deleteFileOrDirectory( File file ) {
> > >>                if (!file.exists()) {
> > >>                        return;
> > >>                }
> > >>
> > >>                if (file.isDirectory()) {
> > >>                        for (File child : file.listFiles()) {
> > >>                                deleteFileOrDirectory( child );
> > >>                        }
> > >>                        file.delete();
> > >>                } else {
> > >>                        file.delete();
> > >>                }
> > >>        }
> > >>
> > >>        private static long randomId() {
> > >>                return RandomUtils.nextLong();
> > >>        }
> > >> }
> > >>
> > >>
> > >> TestLucene
> > >>
> > >>
> > >> package org.ogrm.test;
> > >>
> > >> import java.io.File;
> > >> import java.io.IOException;
> > >>
> > >> import org.apache.lucene.analysis.KeywordAnalyzer;
> > >> import org.apache.lucene.document.Document;
> > >> import org.apache.lucene.document.Field;
> > >> import org.apache.lucene.document.Field.Index;
> > >> import org.apache.lucene.document.Field.Store;
> > >> import org.apache.lucene.index.IndexReader;
> > >> import org.apache.lucene.index.IndexWriter;
> > >> import org.apache.lucene.index.Term;
> > >> import org.apache.lucene.index.IndexWriter.MaxFieldLength;
> > >> import org.apache.lucene.search.IndexSearcher;
> > >> import org.apache.lucene.search.Query;
> > >> import org.apache.lucene.search.ScoreDoc;
> > >> import org.apache.lucene.search.TermQuery;
> > >> import org.apache.lucene.search.TopDocs;
> > >> import org.apache.lucene.store.Directory;
> > >> import org.apache.lucene.store.FSDirectory;
> > >>
> > >> public class TestLucene {
> > >>        private static IndexWriter writer;
> > >>        private static IndexSearcher searcher;
> > >>
> > >>        public static void main( String[] args ) throws Exception {
> > >>                File path = new File( "lcn" );
> > >>                deleteFileOrDirectory( path );
> > >>                Directory dir = FSDirectory.open( path );
> > >>                writer = new IndexWriter( dir, new KeywordAnalyzer(),
> > >> MaxFieldLength.UNLIMITED );
> > >>                writer.setMaxBufferedDocs( 100000 );
> > >>
> > >>                IndexReader reader = writer.getReader();
> > >>                searcher = new IndexSearcher( reader );
> > >>
> > >>                int init = 1000000;
> > >>                int num = 1000;
> > >>                int base = 0;
> > >>                int iterations = 1000;
> > >>                insert( init, base );
> > >>                base = init;
> > >>                for (int i = 0; i < iterations; i++) {
> > >>                        lookup( num, base );
> > >>                        insert( num, base );
> > >>                        base = base + num;
> > >>                }
> > >>        }
> > >>
> > >>        public static void deleteFileOrDirectory( File file ) {
> > >>                if (!file.exists()) {
> > >>                        return;
> > >>                }
> > >>
> > >>                if (file.isDirectory()) {
> > >>                        for (File child : file.listFiles()) {
> > >>                                deleteFileOrDirectory( child );
> > >>                        }
> > >>                        file.delete();
> > >>                } else {
> > >>                        file.delete();
> > >>                }
> > >>        }
> > >>
> > >>        private static void insert( int num, int base ) throws Exception
> > {
> > >>                long t = System.currentTimeMillis();
> > >>                for (int i = num; i < (num + base); i++) {
> > >>                        Document doc = new Document();
> > >>                        doc.add( new Field( "_id_", fastToBytes( i ),
> > >> Store.YES ) );
> > >>                        doc.add( new Field( "key", "value" + i % 10000,
> > >> Store.NO,
> > >> Index.NOT_ANALYZED ) );
> > >>                        writer.addDocument( doc );
> > >>                }
> > >>                writer.commit();
> > >>                System.out.println( num + " inserts in " +
> > >> (System.currentTimeMillis() - t) );
> > >>        }
> > >>
> > >>        private static void lookup( int num, int base ) throws Exception
> > {
> > >>                long t = System.currentTimeMillis();
> > >>                for (int i = 0; i < 100; i++) {
> > >>                        Query query = new TermQuery( new Term( "key",
> > >> "value" + (i + 2500000) ) );
> > >>                        TopDocs docs = searcher.search( query, 100 );
> > >>                        for (ScoreDoc scoreDoc : docs.scoreDocs) {
> > >>                                Document doc = searcher.doc( scoreDoc.doc
> > );
> > >>                                fastToLong( doc.getBinaryValue( "_id_" )
> > );
> > >>                        }
> > >>                }
> > >>                System.out.println( num + " get " +
> > >> (System.currentTimeMillis() - t) );
> > >>        }
> > >>
> > >>        private static byte[] fastToBytes( long value ) throws
> > IOException {
> > >>                byte[] array = new byte[8];
> > >>                for (int i = 0; i < 8; i++) {
> > >>                        array[7 - i] = (byte) (value >>> (i * 8));
> > >>                }
> > >>                return array;
> > >>        }
> > >>
> > >>        private static long fastToLong( byte[] array ) throws IOException
> > {
> > >>                long value = 0;
> > >>                for (int i = 0; i < array.length; i++) {
> > >>                        value <<= 8;
> > >>                        value ^= (long) array[i] & 0xFF;
> > >>                }
> > >>                return value;
> > >>        }
> > >> }
> > >>
> > >>
> > >>
> > >> -atle
> > >> _______________________________________________
> > >> Neo4j mailing list
> > >> User at lists.neo4j.org
> > >> https://lists.neo4j.org/mailman/listinfo/user
> > >>
> > >
> > >
> > >
> > > --
> > > Mattias Persson, [mattias at neotechnology.com]
> > > Hacker, Neo Technology
> > > www.neotechnology.com
> > > _______________________________________________
> > > Neo4j mailing list
> > > User at lists.neo4j.org
> > > https://lists.neo4j.org/mailman/listinfo/user
> > >
> > _______________________________________________
> > Neo4j mailing list
> > User at lists.neo4j.org
> > https://lists.neo4j.org/mailman/listinfo/user
> >
> 
> 
> 




More information about the User mailing list