skip to Main Content

What JPA + Hibernate data type should I use to support vector extension in postgres database, so that it allows me to create embeddings using a JPA Entity

CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3));
https://github.com/pgvector/pgvector

2

Answers


  1. You can use vladmihalcea hibernate types to convert vector type to List, so it is possible to save or query with JpaRepository.

    1. add dependency to pom.xml file:
        <dependency>
          <groupId>io.hypersistence</groupId>
          <artifactId>hypersistence-utils-hibernate-55</artifactId>
          <version>3.5.0</version>
        </dependency>
    
    1. create Item class:
    
        import com.fasterxml.jackson.annotation.JsonInclude;
        import io.hypersistence.utils.hibernate.type.json.JsonType;
        import lombok.Data;
        import lombok.NoArgsConstructor;
        import org.hibernate.annotations.Type;
        import org.hibernate.annotations.TypeDef;
        
        import javax.persistence.*;
        import java.util.List;
        
        
        @Data
        @NoArgsConstructor
        @Entity
        @Table(name = "items")
        @JsonInclude(JsonInclude.Include.NON_NULL)
        @TypeDef(name = "json", typeClass = JsonType.class)
        public class Item {
          @Id
          @GeneratedValue(strategy = GenerationType.IDENTITY)
          private Long id;
        
          @Type(type = "json")
          @Column(columnDefinition = "vector")
          private List<Double> embedding;
        }
    
    
    1. create JpaRepository interface that support save and find. You can write custom findNearestNeighbors methods with native sql
    
        import org.springframework.data.jpa.repository.JpaRepository;
        
        public interface ItemRepository extends JpaRepository<Item, Long> {
    
        // find nearest neighbors by a vector, for example value= "[1,2,3]"
        // This also works, cast is equals to :: operator in postgresql
        //@Query(nativeQuery = true, value = "SELECT * FROM items ORDER BY embedding <-> cast(? as vector) LIMIT 5")
         @Query(nativeQuery = true, value = "SELECT * FROM items ORDER BY embedding <-> ? \:\:vector LIMIT 5")
         List<Item> findNearestNeighbors(String value);
        
         // find nearest neighbors by a record in the same table
         @Query(nativeQuery = true, value = "SELECT * FROM items WHERE id != :id ORDER BY embedding <-> (SELECT embedding FROM items WHERE id = :id) LIMIT 5")
          List<Item> findNearestNeighbors(Long id);
          
        }
    
    
    1. Test create, query and findNearestNeighbors:
    
      @Autowired
      private ItemRepository itemRepository;
    
    
      @Test
      @Rollback(false)
      @Transactional
      public void createItem() {
        Item item = new Item();
        Random rand = new Random();
        List<Double> embedding = new ArrayList<>();
        for (int i = 0; i < 3; i++) embedding.add(rand.nextDouble());
        item.setEmbedding(embedding);
        itemRepository.save(item);
      }
    
      @Test
      public void loadItems() {
        final List<Item> items = itemRepository.findAll();
        System.out.println(items);
      }
    
      @Test
      public void findNearestNeighbors() {
        final String value = "[0.1, 0.2, 0.3]";
        final List<Item> items = itemRepository.findNearestNeighbors(value);
        System.out.println(items);
      }
    
    
    Login or Signup to reply.
  2. you can use org.hibernate.type.descriptor.java.SerializableTypeDesc
    like this example:

    @Entity
    @Table(name="items")
    public class Item {
        @Id
        @GeneratedValue(strategy = GenerationType.IDENTITY)
        private Long id;
    
        @Type(type = "serializable")
        @Column(columnDefinition = "bytea")
        private double[] embedding;
    
        // getters and setters
    }
    

    @Type: embedding field should use the serializable type, which is mapped to the SerializableTypeDescriptor class.
    @Column : column definition for the embedding field, which is bytea.

    then you can create embeddings using a JPA Entity and store them in a PostgreSQL database.any object that can be serialized to a byte array is acceptable!

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search