|
| 1 | +from unstructured.ingest.connector.chroma import ( |
| 2 | + ChromaAccessConfig, |
| 3 | + ChromaWriteConfig, |
| 4 | + SimpleChromaConfig, |
| 5 | +) |
| 6 | +from unstructured.ingest.connector.local import SimpleLocalConfig |
1 | 7 | from unstructured.ingest.interfaces import (
|
2 | 8 | ChunkingConfig,
|
3 | 9 | EmbeddingConfig,
|
|
6 | 12 | ReadConfig,
|
7 | 13 | )
|
8 | 14 | from unstructured.ingest.runner import LocalRunner
|
| 15 | +from unstructured.ingest.runner.writers.base_writer import Writer |
| 16 | +from unstructured.ingest.runner.writers.chroma import ( |
| 17 | + ChromaWriter, |
| 18 | +) |
| 19 | + |
| 20 | + |
| 21 | +def get_writer() -> Writer: |
| 22 | + return ChromaWriter( |
| 23 | + connector_config=SimpleChromaConfig( |
| 24 | + access_config=ChromaAccessConfig(), |
| 25 | + host="localhost", |
| 26 | + port=8000, |
| 27 | + collection_name="elements", |
| 28 | + tenant="default_tenant", |
| 29 | + database="default_database", |
| 30 | + ), |
| 31 | + write_config=ChromaWriteConfig(), |
| 32 | + ) |
| 33 | + |
9 | 34 |
|
10 | 35 | if __name__ == "__main__":
|
| 36 | + writer = get_writer() |
11 | 37 | runner = LocalRunner(
|
12 | 38 | processor_config=ProcessorConfig(
|
13 | 39 | verbose=True,
|
14 |
| - output_dir="local-output-to-pinecone", |
| 40 | + output_dir="local-output-to-chroma", |
15 | 41 | num_processes=2,
|
16 | 42 | ),
|
| 43 | + connector_config=SimpleLocalConfig( |
| 44 | + input_path="example-docs/book-war-and-peace-1225p.txt", |
| 45 | + ), |
17 | 46 | read_config=ReadConfig(),
|
18 | 47 | partition_config=PartitionConfig(),
|
19 | 48 | chunking_config=ChunkingConfig(chunk_elements=True),
|
20 | 49 | embedding_config=EmbeddingConfig(
|
21 | 50 | provider="langchain-huggingface",
|
22 | 51 | ),
|
23 |
| - writer_type="chroma", |
24 |
| - writer_kwargs={ |
25 |
| - "host": "localhost", |
26 |
| - "port": 8000, |
27 |
| - "collection_name": "test-collection", |
28 |
| - "batch_size": 80, |
29 |
| - }, |
30 |
| - ) |
31 |
| - runner.run( |
32 |
| - input_path="example-docs/fake-memo.pdf", |
| 52 | + writer=writer, |
| 53 | + writer_kwargs={}, |
33 | 54 | )
|
| 55 | + runner.run() |
0 commit comments