diff --git a/main.go b/main.go index d35bebf..e3fa564 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "database/sql" "encoding/csv" "encoding/json" @@ -9,7 +10,6 @@ import ( "io" "net/http" "os" - "slices" "strings" _ "github.com/go-sql-driver/mysql" @@ -24,49 +24,83 @@ var ( func main() { flag.Parse() file, err := os.Open(*csvPath) - assertNoErr(err) + assert(err) records, err := csv.NewReader(file).ReadAll() - assertNoErr(err) + assert(err) r := []string{} for _, record := range records { r = append(r, strings.TrimSpace(record[0])) } - - // Elastic - elastic := *elasticSearch + "/aips/_search?" - res, err := http.Get(elastic) - assertNoErr(err) - data, err := io.ReadAll(res.Body) - assertNoErr(err) - - var er ElasticAipResponse - err = json.Unmarshal(data, &er) - assertNoErr(err) - - fmt.Printf("Total: %d\n", er.Hits.Total) - result, err := os.OpenFile("aip_names.txt", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) - for _, index := range er.Hits.Hits { - if slices.ContainsFunc(r, func(record string) bool { - return strings.Contains(index.Source.Name, record) - }) { - fmt.Println("AIP Found: " + index.Source.Name) - _, err = result.WriteString(index.Source.Name + "\n") - assertNoErr(err) + assert(err) + + // Query mapping to know if it's raw or keyword: /aips/_mapping + q := struct { + Query struct { + Term struct { + Name string `json:"name.raw"` + } `json:"term"` + } `json:"query"` + }{} + for _, name := range r { + q.Query.Term.Name = name + var res ElasticAipResponse + err = do(*elasticSearch+"/aips/_search", q, &res) + assert(err) + if res.Hits.Total == 1 { + fmt.Println("AIP Found: " + res.Hits.Hits[0].Source.Name) + _, err = result.WriteString(name + "\n") + assert(err) + } else if res.Hits.Total > 1 { + fmt.Println("more than one match, this is not expeted") + fmt.Println("Name: ", name) + os.Exit(1) } } } +func do(path string, payload, responsePayload any) error { + var body io.Reader + if payload != nil { + jsonBody, err := json.Marshal(payload) + if err != nil { + return err + } + // fmt.Println(string(jsonBody)) + body = bytes.NewReader(jsonBody) + } + req, err := http.NewRequest(http.MethodGet, path, body) + if err != nil { + return err + } + + req.Header.Set("Content-Type", "application/json") + res, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + + resBody, err := io.ReadAll(res.Body) + if err != nil { + return err + } + if res.StatusCode >= 400 { + fmt.Println(string(resBody)) + return fmt.Errorf("request failed: %s", http.StatusText(res.StatusCode)) + } + return json.Unmarshal(resBody, responsePayload) +} + func openDB(connStr string) *sql.DB { db, err := sql.Open("mysql", connStr) - assertNoErr(err) + assert(err) err = db.Ping() - assertNoErr(err) + assert(err) return db } -func assertNoErr(err error) { +func assert(err error) { if err != nil { panic(err) } diff --git a/re-index.sh b/re-index.sh new file mode 100755 index 0000000..1f06784 --- /dev/null +++ b/re-index.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# Usage: ./script.sh uuids.txt +# +sudo -u archivematica bash -c " \ + set -a -e -x + source /etc/default/archivematica-dashboard || \ + source /etc/sysconfig/archivematica-dashboard \ + || (echo 'Environment file not found'; exit 1) + /usr/share/archivematica/virtualenvs/archivematica/bin/python -m archivematica.dashboard.manage \ + rebuild_aip_index_from_storage_service --delete --uuid ec5c14b5-a629-476a-be48-21d36dda7cc0 +"; + +set -euo pipefail + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +UUID_FILE="$1" + +if [ ! -f "$UUID_FILE" ]; then + echo "File not found: $UUID_FILE" + exit 1 +fi + +while IFS= read -r uuid; do + [[ -z "$uuid" ]] && continue + + echo "Processing UUID: $uuid" + + sudo -u archivematica bash -c " + set -a -e -x + source /etc/default/archivematica-dashboard 2>/dev/null || + source /etc/sysconfig/archivematica-dashboard 2>/dev/null || + { echo 'Environment file not found'; exit 1; } + /usr/share/archivematica/virtualenvs/archivematica/bin/python \ + -m archivematica.dashboard.manage \ + rebuild_aip_index_from_storage_service --delete --uuid $uuid + " + +done < "$UUID_FILE"