Skip to content

Commit

Permalink
ensuring session is not deleted in case some obfuscation scripts fail…
Browse files Browse the repository at this point in the history
…s at browser session init
  • Loading branch information
pzaino committed Jan 16, 2025
1 parent 33a29cd commit 3f56851
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 3 deletions.
4 changes: 2 additions & 2 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -3255,8 +3255,8 @@ func ConnectVDI(ctx *ProcessContext, sel SeleniumInstance, browseType int) (sele
cmn.DebugMsg(cmn.DbgLvlDebug, "Browser Configuration: %v\n", result)
}

err = addLoadListener(&wd)
if err != nil {
err2 := addLoadListener(&wd)
if err2 != nil {
cmn.DebugMsg(cmn.DbgLvlError, "adding Load Listener to the VDI session: %v", err)
}

Expand Down
150 changes: 149 additions & 1 deletion pkg/database/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@
// Package database is responsible for handling the database setup, configuration and abstraction.
package database

import "fmt"
import (
"encoding/json"
"fmt"
"net/url"
"strings"

cfg "github.com/pzaino/thecrowler/pkg/config"
)

// GetSourceByID retrieves a source from the database by its ID.
func GetSourceByID(db *Handler, sourceID uint64) (*Source, error) {
Expand All @@ -29,3 +36,144 @@ func GetSourceByID(db *Handler, sourceID uint64) (*Source, error) {

return source, nil
}

// CreateSource inserts a new source into the database with detailed configuration validation and marshaling.
func CreateSource(db *Handler, source *Source, config cfg.SourceConfig) (uint64, error) {
// Validate the SourceConfig
err := validateSourceConfig(config)
if err != nil {
return 0, fmt.Errorf("invalid source configuration: %v", err)
}

// Marshal the SourceConfig into JSONB format
details, err := json.Marshal(config)
if err != nil {
return 0, fmt.Errorf("failed to marshal source configuration: %v", err)
}

var sourceID uint64
query := `
INSERT INTO Sources (url, name, category_id, usr_id, restricted, flags, details)
VALUES ($1, $2, $3, $4, $5, $6, $7)
RETURNING source_id
`
err = (*db).QueryRow(query, source.URL, source.Name, source.CategoryID, source.UsrID, source.Restricted, source.Flags, details).Scan(&sourceID)
if err != nil {
return 0, fmt.Errorf("failed to create source: %v", err)
}

return sourceID, nil
}

// validateSourceConfig validates the SourceConfig struct.
func validateSourceConfig(config cfg.SourceConfig) error {
if config.Version == "" || config.FormatVersion == "" || config.SourceName == "" {
return fmt.Errorf("version, format_version, and source_name are required fields")
}

if err := validateURL(config.CrawlingConfig.Site); err != nil {
return fmt.Errorf("invalid URL in crawling_config: %v", err)
}

for _, item := range config.ExecutionPlan {
if item.Label == "" || len(item.Conditions.URLPatterns) == 0 {
return fmt.Errorf("execution plan items must have a label and at least one URL pattern")
}
}

// Add more validation as needed based on your specific requirements.
return nil
}

// validateURL checks if a given URL is valid.
func validateURL(site string) error {
parsedURL, err := url.Parse(site)
if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" {
return fmt.Errorf("invalid URL: %s", site)
}
return nil
}

// UpdateSource updates an existing source in the database by ID.
func UpdateSource(db *Handler, source *Source) error {
query := `
UPDATE Sources
SET url = $1, name = $2, category_id = $3, usr_id = $4, restricted = $5, flags = $6, details = $7, last_updated_at = NOW()
WHERE source_id = $8
`
_, err := (*db).Exec(query, source.URL, source.Name, source.CategoryID, source.UsrID, source.Restricted, source.Flags, source.Config, source.ID)
if err != nil {
return fmt.Errorf("failed to update source with ID %d: %v", source.ID, err)
}
return nil
}

// DeleteSource removes a source from the database by ID.
func DeleteSource(db *Handler, sourceID uint64) error {
query := `DELETE FROM Sources WHERE source_id = $1`
_, err := (*db).Exec(query, sourceID)
if err != nil {
return fmt.Errorf("failed to delete source with ID %d: %v", sourceID, err)
}
return nil
}

// ListSources retrieves all sources from the database with optional filters.
func ListSources(db *Handler, categoryID *uint64, userID *uint64) ([]Source, error) {
sources := []Source{}
query := `SELECT source_id, url, name, category_id, usr_id, restricted, flags, details FROM Sources`
var args []interface{}
var conditions []string

if categoryID != nil {
conditions = append(conditions, "category_id = $1")
args = append(args, *categoryID)
}
if userID != nil {
conditions = append(conditions, "usr_id = $2")
args = append(args, *userID)
}
if len(conditions) > 0 {
query += " WHERE " + strings.Join(conditions, " AND ")
}
rows, err := (*db).ExecuteQuery(query, args...)
if err != nil {
return nil, fmt.Errorf("failed to list sources: %v", err)
}
defer rows.Close() //nolint:errcheck // We can't check return value on defer

for rows.Next() {
var source Source
err := rows.Scan(&source.ID, &source.URL, &source.Name, &source.CategoryID, &source.UsrID, &source.Restricted, &source.Flags, &source.Config)
if err != nil {
return nil, fmt.Errorf("failed to scan source: %v", err)
}
sources = append(sources, source)
}
return sources, nil
}

// GetSourcesByStatus retrieves all sources with a specific status.
func GetSourcesByStatus(db *Handler, status string) ([]Source, error) {
sources := []Source{}
query := `
SELECT source_id, url, name, category_id, usr_id, restricted, flags, details
FROM Sources
WHERE status = $1
`
rows, err := (*db).ExecuteQuery(query, status)
if err != nil {
return nil, fmt.Errorf("failed to retrieve sources by status: %v", err)
}
defer rows.Close() //nolint:errcheck // We can't check return value on defer

for rows.Next() {
var source Source
err := rows.Scan(&source.ID, &source.URL, &source.Name, &source.CategoryID, &source.UsrID, &source.Restricted, &source.Flags, &source.Config)
if err != nil {
return nil, fmt.Errorf("failed to scan source: %v", err)
}
sources = append(sources, source)
}
return sources, nil
}

0 comments on commit 3f56851

Please sign in to comment.