Backend and Chunking
For this project, we will be using Google's Antigravity for the development. Make sure you have it installed in your system.
Creating the NextJS Application
We will be creating the NextJS application using NextJS. Follow the following steps to setup:
mkdir second-brain && cd second-brain
npm create next@latest .Yes, use recommended defaults
{
"name": "second-brain",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev --webpack",
"build": "next build --webpack",
"start": "next start --webpack",
"lint": "eslint"
},
"dependencies": {
"@ai-sdk/google": "^2.0.44",
"@ai-sdk/react": "^2.0.105",
"@chroma-core/default-embed": "^0.1.9",
"@google/generative-ai": "^0.24.1",
"@langchain/community": "^1.0.5",
"@langchain/core": "^1.1.0",
"@langchain/openai": "^1.1.3",
"@langchain/textsplitters": "^1.0.1",
"@types/react-syntax-highlighter": "^15.5.13",
"@vercel/analytics": "^1.6.1",
"ai": "^5.0.105",
"chromadb": "^3.1.6",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"dotenv": "^16.4.5",
"framer-motion": "^12.23.26",
"langchain": "^1.1.1",
"lucide-react": "^0.562.0",
"mammoth": "^1.11.0",
"mongodb": "^7.0.0",
"motion": "^12.23.26",
"next": "16.0.10",
"openai": "^4.62.1",
"pdf-parse": "^1.1.1",
"pg": "^8.16.3",
"react": "19.2.1",
"react-dom": "19.2.1",
"react-markdown": "^10.1.0",
"react-speech-recognition": "^4.0.1",
"react-syntax-highlighter": "^16.1.0",
"regenerator-runtime": "^0.14.1",
"rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0",
"tailwind-merge": "^3.4.0"
},
"devDependencies": {
"@prisma/client": "^7.2.0",
"@tailwindcss/postcss": "^4",
"@types/node": "^20",
"@types/pdf-parse": "^1.1.5",
"@types/react": "^19",
"@types/react-dom": "^19",
"@types/react-speech-recognition": "^3.9.6",
"eslint": "^9",
"eslint-config-next": "16.0.6",
"prisma": "^7.2.0",
"tailwindcss": "^4",
"tw-animate-css": "^1.4.0",
"typescript": "^5"
}
}⚙️Setting up ChromaDB
Go to ChromaDb and follow these steps:
1. Create your first database

2. Get the API Key, Tenant Key and Database

We will be using the default chorma embeddings, so we need to install the @chroma-core/default-embed package
npm install @chroma-core/default-embedCopy them in a .env file in the root of your server directory:
CHROMA_API_KEY=YOUR_CHROMA_API_KEY
CHROMA_TENANT=YOUR_CHROMA_TENANT
CHROMA_DATABASE=YOUR_CHROMA_DATABASEInitialising the chromaClient
Now that we have the ChromaDB Secrets, next we shall setup the chromaClient. Create a the chromaClient.ts under the lib folder.
import { CloudClient, EmbeddingFunction } from "chromadb";
import { GoogleGenerativeAI } from "@google/generative-ai";
import dotenv from "dotenv";
dotenv.config();
const apiKey = process.env.CHROMA_API_KEY!;
const tenant = process.env.CHROMA_TENANT!;
const database = process.env.CHROMA_DATABASE!;
const geminiApiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY!;
if (!apiKey || !tenant || !database || !geminiApiKey) {
throw new Error("CHROMA_API_KEY, CHROMA_TENANT, CHROMA_DATABASE, and GOOGLE_GENERATIVE_AI_API_KEY must be set");
}
export const genAI = new GoogleGenerativeAI(geminiApiKey);
export const chroma = new CloudClient({
apiKey,
tenant,
database,
});
export async function getOrCreateCollection(name: string) {
return chroma.getOrCreateCollection({
name,
});
}
This is actually where we are connecting our project to the chromaDB and also initialising the GoogleGenerativeAI object for the LLM part.
Creating the Chunk and Injest Function
After successfully creating the chromaClient function, next is to create the chunkAndInjest.ts which will be responsible for chunking the documents and injesting them to ChromaDB. Inside the same lib folder, create this file.
"use server";
import fs from "fs/promises";
import path from "path";
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { getOrCreateCollection, genAI } from "./chromaClient";
export async function ingestTextIntoChroma(
collectionName: string,
filePath: string,
text: string,
metadata: Record<string, number> = {}
) {
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1200,
chunkOverlap: 200,
});
const chunks = await splitter.splitText(text);
const collection = await getOrCreateCollection(collectionName);
}Let us understand the main libraries used here
RecursiveCharacterTextSplitter= Text splitters break large docs into smaller chunks that will be retrievable individually and fit within model context window limit.getOrCreateCollection= Creates a new collection or returns an existing one.
Let us now create the injest api which will be called for chunking and injesting the data to ChromaDB. We will create a new api route under src/app/api/injest/route.ts
import crypto from "crypto";
import { NextRequest } from "next/server";
import { ingestTextIntoChroma } from "@/lib/chunkAndIngest";
import { getOrCreateCollection } from "@/lib/chromaClient";
export const runtime = "nodejs";
const API_KEY = process.env.CHROMA_API_KEY!;
function hashContent(text: string) {
return crypto.createHash("sha256").update(text).digest("hex");
}
export async function POST(req: NextRequest) {
const auth = req.headers.get("authorization");
if (auth !== `Bearer ${API_KEY}`) {
return new Response("Unauthorized", { status: 401 });
}
const { filePath, content } = await req.json();
if (!filePath || !content) {
return new Response("Invalid payload", { status: 400 });
}
const collection = await getOrCreateCollection("secondbrain");
const fileHash = hashContent(content);
// 🔍 Check if file already ingested with same hash
const existing = await collection.get({
where: { filePath },
include: ["metadatas"],
});
const existingHash = existing.metadatas?.[0]?.fileHash;
if (existingHash === fileHash) {
// console.log(`Skipping unchanged file: ${filePath}`);
return Response.json({ status: "skipped" });
}
// ♻️ Delete old chunks
await collection.delete({ where: { filePath } });
// ➕ Ingest new content
await ingestTextIntoChroma(
"secondbrain",
filePath,
content,
{ fileHash: parseInt(fileHash, 16) }
);
return Response.json({ status: "ingested", filePath });
}
Testing it using Google Antigravity
Now that we have the function and API ready, let us use Google Antigravity to test it. Open the Agent Manager and prompt it with the following:
Given the following API route and the chunk and injesting logic, please create a knowledge folder which will consist of a markdown file consisting of some knowledge data and test the API route.After the agent completes the testing, you will be able to see the entire document get converted into chunks and displayed on the console.
Next Steps
In the next section, we’ll:
Convert the chunks into Embeddings
Convert all the chunks into embeddings using chromaDB
Store the Embeddings
Store the Embeddings into ChromaDB
If you want to know more about this, do checkout our video guide:
