@@ -3,17 +3,16 @@ name: Sync vector DB
3
3
4
4
on :
5
5
workflow_dispatch :
6
- inputs :
7
- enable_artifact_download :
8
- description : ' Enable artifact download step'
9
- type : boolean
10
- required : false
11
- default : true
6
+
12
7
jobs :
13
- # This workflow contains a single job called "greet"
14
8
sync_db :
15
9
# The type of runner that the job will run on
16
10
runs-on : ubuntu-latest
11
+ permissions :
12
+ contents : read
13
+ id-token : write
14
+ env :
15
+ AWS_REGION : us-east-1
17
16
18
17
# Steps represent a sequence of tasks that will be executed as part of the job
19
18
steps :
@@ -31,32 +30,34 @@ jobs:
31
30
git lfs install
32
31
git lfs pull
33
32
34
- - name : Download json data
35
- id : download-json-data
36
- uses : dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
33
+ - name : Configure AWS Credentials for S3
34
+ uses : aws-actions/configure-aws-credentials@49f33fe638c0cba4fb16037a27915a7ab7740259
37
35
with :
38
- repo : stacklok/codegate-data
39
- workflow : " .github/workflows/generate-artifact.yml"
40
- workflow_conclusion : success
41
- name : jsonl-files
42
- path : /tmp/
43
- name_is_regexp : true
44
- skip_unpack : false
45
- if_no_artifact_found : ignore
36
+ role-to-assume : ${{ secrets.AWS_ROLE_INSIGHT_DATA_IMPORT }}
37
+ aws-region : ${{ env.AWS_REGION }}
46
38
47
- - name : Download artifact
48
- if : ${{ github.event.inputs.enable_artifact_download == 'true' }}
49
- id : download-artifact
50
- uses : dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
51
- with :
52
- github_token : ${{ github.token }}
53
- workflow : " .github/workflows/import_packages.yml"
54
- workflow_conclusion : success
55
- name : sqlite_data
56
- path : /tmp/
57
- name_is_regexp : true
58
- skip_unpack : false
59
- if_no_artifact_found : ignore
39
+ - name : Download JSONL files from S3
40
+ run : |
41
+ echo "Downloading manifest.json from S3..."
42
+ aws s3 cp s3://codegate-data-prod/manifest.json ./manifest.json --region $AWS_REGION
43
+ echo "Manifest content:"
44
+ cat manifest.json
45
+
46
+ echo "Parsing manifest..."
47
+ MALICIOUS_KEY=$(jq -r '.latest.malicious_packages' manifest.json)
48
+ DEPRECATED_KEY=$(jq -r '.latest.deprecated_packages' manifest.json)
49
+ ARCHIVED_KEY=$(jq -r '.latest.archived_packages' manifest.json)
50
+
51
+ echo "Malicious key: $MALICIOUS_KEY"
52
+ echo "Deprecated key: $DEPRECATED_KEY"
53
+ echo "Archived key: $ARCHIVED_KEY"
54
+
55
+ mkdir -p /tmp/jsonl-files
56
+
57
+ # Download and map the S3 files to fixed names in /tmp/jsonl-files
58
+ aws s3 cp s3://codegate-data-prod/$MALICIOUS_KEY /tmp/jsonl-files/malicious.jsonl --region $AWS_REGION
59
+ aws s3 cp s3://codegate-data-prod/$DEPRECATED_KEY /tmp/jsonl-files/deprecated.jsonl --region $AWS_REGION
60
+ aws s3 cp s3://codegate-data-prod/$ARCHIVED_KEY /tmp/jsonl-files/archived.jsonl --region $AWS_REGION
60
61
61
62
- name : Install Poetry
62
63
run : |
0 commit comments