-
Notifications
You must be signed in to change notification settings - Fork 280
feat: add support for named vectorizers to ai.vectorizer_errors #740
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
-- rename the ai.vectorizer_errors table to ai._vectorizer_errors | ||
alter table ai.vectorizer_errors rename to _vectorizer_errors; | ||
|
||
-- rename the existing index on the ai.vectorizer_error so it follows the right naming convention (adds the _ prefix) | ||
-- this is not strictly necessary, but it is a good practice to keep the naming consistent | ||
alter index ai.vectorizer_errors_id_recorded_idx rename to _vectorizer_errors_id_recorded_idx; | ||
|
||
-- create a view including vectorizer name | ||
create or replace view ai.vectorizer_errors as | ||
select | ||
ve.*, | ||
v.name | ||
from | ||
ai._vectorizer_errors ve | ||
left join ai.vectorizer v on ve.id = v.id; | ||
|
||
|
||
-- grant privileges on new ai.vectorizer_errors view | ||
do language plpgsql $block$ | ||
declare | ||
to_user text; | ||
priv_type text; | ||
with_grant text; | ||
rec record; | ||
begin | ||
-- find all users that have permissions on old ai.vectorizer_errors table and grant them to the view | ||
for rec in | ||
select distinct grantee as username, privilege_type, is_grantable | ||
from information_schema.role_table_grants | ||
where table_schema = 'ai' | ||
and table_name = '_vectorizer_errors' | ||
loop | ||
to_user := rec.username; | ||
priv_type := rec.privilege_type; | ||
with_grant := ''; | ||
if rec.is_grantable then | ||
with_grant := ' WITH GRANT OPTION'; | ||
end if; | ||
execute format('GRANT %s ON ai.vectorizer_errors TO %I %s', priv_type, to_user, with_grant); | ||
end loop; | ||
end | ||
$block$; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,11 +15,13 @@ def __init__( | |
has_worker_tracking_table: bool, | ||
has_loading_retries: bool, | ||
has_reveal_secret_function: bool, | ||
has_vectorizer_errors_view: bool, | ||
) -> None: | ||
self.has_disabled_column = has_disabled_column | ||
self.has_worker_tracking_table = has_worker_tracking_table | ||
self.has_loading_retries = has_loading_retries | ||
self.has_reveal_secret_function = has_reveal_secret_function | ||
self.has_vectorizer_errors_view = has_vectorizer_errors_view | ||
|
||
@classmethod | ||
def from_db(cls: type[Self], cur: psycopg.Cursor) -> Self: | ||
|
@@ -62,20 +64,31 @@ def from_db(cls: type[Self], cur: psycopg.Cursor) -> Self: | |
cur.execute(query) | ||
has_reveal_secret_function = cur.fetchone() is not None | ||
|
||
# Newer versions of pgai lib have the ai.vectorizer_errors view. | ||
# The table has been renamed to ai._vectorizer_errors | ||
query = """ | ||
SELECT table_name | ||
FROM information_schema.views | ||
WHERE table_schema = 'ai' AND table_name = 'vectorizer_errors'; | ||
""" | ||
cur.execute(query) | ||
has_vectorizer_errors_view = cur.fetchone() is not None | ||
Comment on lines
+67
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would've probably checked if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any benefit from that approach? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's just one step closer to what the vectorizer actually needs. |
||
|
||
return cls( | ||
has_disabled_column, | ||
has_worker_tracking_table, | ||
has_loading_retries, | ||
has_reveal_secret_function, | ||
has_vectorizer_errors_view, | ||
) | ||
|
||
@classmethod | ||
def for_testing_latest_version(cls: type[Self]) -> Self: | ||
return cls(True, True, True, True) | ||
return cls(True, True, True, True, True) | ||
|
||
@classmethod | ||
def for_testing_no_features(cls: type[Self]) -> Self: | ||
return cls(False, False, False, False) | ||
return cls(False, False, False, False, False) | ||
|
||
@cached_property | ||
def disable_vectorizers(self) -> bool: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those are the changes introduced compared to #683 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
interactions: | ||
- request: | ||
body: '{"input": ["post_1", "post_2", "post_3"], "model": "intentionally-bad-embedding-model", | ||
"dimensions": 1536, "encoding_format": "float"}' | ||
headers: | ||
accept: | ||
- application/json | ||
accept-encoding: | ||
- gzip, deflate, br, zstd | ||
connection: | ||
- keep-alive | ||
content-length: | ||
- '135' | ||
content-type: | ||
- application/json | ||
host: | ||
- api.openai.com | ||
user-agent: | ||
- AsyncOpenAI/Python 1.70.0 | ||
x-stainless-arch: | ||
- arm64 | ||
x-stainless-async: | ||
- async:asyncio | ||
x-stainless-lang: | ||
- python | ||
x-stainless-os: | ||
- MacOS | ||
x-stainless-package-version: | ||
- 1.70.0 | ||
x-stainless-raw-response: | ||
- stream | ||
x-stainless-read-timeout: | ||
- '600' | ||
x-stainless-retry-count: | ||
- '0' | ||
x-stainless-runtime: | ||
- CPython | ||
x-stainless-runtime-version: | ||
- 3.10.15 | ||
method: POST | ||
uri: https://api.openai.com/v1/embeddings | ||
response: | ||
body: | ||
string: !!binary | | ||
IbgDACCWTuUm6PQ4rrgJLvdamfPpEQSGbUltWUFbEFGGYZjAxe42Bq34IaX8BF8PAwDy8zzMJFA4 | ||
pmeHnyRA/+TRDc63ULlffb/moddte3GjHfed8c7lPnKYK4gh6/kTw4xr2KBU4Yp/YR2Q1x81d2O9 | ||
xrbc27GnnL2FuUiKHdxS2O8k0G9tuwPs4JDNkP2wyqj8TKj62McMAw== | ||
headers: | ||
CF-RAY: | ||
- 93fa9285b965589a-BCN | ||
Connection: | ||
- keep-alive | ||
Content-Encoding: | ||
- br | ||
Content-Type: | ||
- application/json; charset=utf-8 | ||
Date: | ||
- Wed, 14 May 2025 12:59:22 GMT | ||
Server: | ||
- cloudflare | ||
Transfer-Encoding: | ||
- chunked | ||
X-Content-Type-Options: | ||
- nosniff | ||
alt-svc: | ||
- h3=":443"; ma=86400 | ||
cf-cache-status: | ||
- DYNAMIC | ||
strict-transport-security: | ||
- max-age=31536000; includeSubDomains; preload | ||
vary: | ||
- Origin | ||
x-request-id: | ||
- req_c8879e03af8f9ecd9ac5178dbc89604d | ||
status: | ||
code: 404 | ||
message: Not Found | ||
version: 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are the changes introduced compared to #683