-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlicense-recommend-openai.sh
More file actions
69 lines (55 loc) · 2.34 KB
/
license-recommend-openai.sh
File metadata and controls
69 lines (55 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
CSV_INPUT="repos.csv"
CSV_OUTPUT="license-repo-openai.csv"
MODEL="gpt-3.5-turbo"
RATE_LIMIT_DELAY=20
if [[ ! -f "$CSV_INPUT" ]]; then
echo "Error: CSV file not found."
exit 1
fi
echo "Repository,Repository_url,AI_Recommended_License,Reason" > "$CSV_OUTPUT"
tail -n +2 "$CSV_INPUT" | while IFS=',' read -r repo_name repo_url; do
repo_name=$(echo "$repo_name" | xargs)
repo_url=$(echo "$repo_url" | xargs)
if [[ -z "$repo_name" || -z "$repo_url" ]]; then
echo "Skipping invalid row: $repo_name | $repo_url"
continue
fi
prompt="You are an expert in open-source licensing.
Given the open-source repository \"$repo_name\" hosted at \"$repo_url\", recommend the most suitable license based on:
1. Community engagement — fostering contributions and adoption.
2. Commercial differentiation — protecting business interests.
Choose strictly ONE license: MIT, MPL-2.0, or BUSL.
Respond ONLY with the license name and a short one-sentence reason. Format it like this (no bullet points, no markdown):
LICENSE: <MIT|MPL-2.0|BUSL>
REASON: <brief reason>"
response=$(curl -s https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: application/json" \
-d "$(jq -nc \
--arg model "$MODEL" \
--arg prompt "$prompt" \
'{
model: $model,
messages: [
{"role": "user", "content": $prompt}
],
temperature: 0.3,
max_tokens: 150
}')"
)
echo "$response"
content=$(echo "$response" | jq -r '.choices[0].message.content // empty')
license=$(echo "$content" | grep -iEo 'LICENSE: *(MIT|MPL-2\.0|BUSL)' | grep -iEo 'MIT|MPL-2\.0|BUSL' | head -n1)
reason=$(echo "$content" | grep -i '^REASON:' | sed 's/^REASON:[ ]*//')
if [[ -z "$license" || "$license" == "null" ]]; then
license="No license"
fi
if [[ -z "$reason" || "$reason" == "null" ]]; then
reason="No reason provided"
fi
echo "$repo_name,$repo_url,$license,\"$reason\"" >> "$CSV_OUTPUT"
echo "Processed: $repo_name"
sleep "$RATE_LIMIT_DELAY"
done
echo "All done. Results saved to: $CSV_OUTPUT"