diff --git a/main.go b/main.go index 50e8d8d..5a1393d 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,366 @@ package main -import "fmt" +import ( + "context" + "crypto/sha1" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "os" + "os/signal" + "path/filepath" + "regexp" + "strings" + "sync" + "syscall" + "time" +) + +// FileMetadata stores information about a downloaded file +type FileMetadata struct { + URL string `json:"url"` + ContentDisposition string `json:"content_disposition,omitempty"` + SuggestedFilename string `json:"suggested_filename,omitempty"` + ActualFilename string `json:"actual_filename"` + SHA1Checksum string `json:"sha1_checksum"` + ETag string `json:"etag,omitempty"` + LastModified time.Time `json:"last_modified,omitempty"` + DownloadedAt time.Time `json:"downloaded_at"` + ContentType string `json:"content_type,omitempty"` + ContentLength int64 `json:"content_length,omitempty"` +} + +// Config holds application configuration +type Config struct { + Concurrency int + OutputDir string + MetadataDir string + Timeout time.Duration + RetryCount int + RetryDelay time.Duration + SkipExisting bool + Verbose bool +} func main() { - fmt.Println("vim-go") + // Parse command line flags + concurrency := flag.Int("concurrency", 10, "Number of concurrent downloads") + outputDir := flag.String("output", "downloads", "Directory to store downloaded files") + metadataDir := flag.String("metadata", "metadata", "Directory to store metadata files") + timeout := flag.Duration("timeout", 5*time.Minute, "Download timeout") + retryCount := flag.Int("retries", 3, "Number of retries for failed downloads") + retryDelay := flag.Duration("retry-delay", 5*time.Second, "Delay between retries") + skipExisting := flag.Bool("skip-existing", true, "Skip download if file with same checksum exists") + verbose := flag.Bool("verbose", false, "Enable verbose logging") + flag.Parse() + + // Create configuration + config := Config{ + Concurrency: *concurrency, + OutputDir: *outputDir, + MetadataDir: *metadataDir, + Timeout: *timeout, + RetryCount: *retryCount, + RetryDelay: *retryDelay, + SkipExisting: *skipExisting, + Verbose: *verbose, + } + + // Ensure output directories exist + for _, dir := range []string{config.OutputDir, config.MetadataDir} { + if err := os.MkdirAll(dir, 0755); err != nil { + log.Fatalf("Failed to create directory %s: %v", dir, err) + } + } + + // Read URLs from stdin or file + var urls []string + if flag.NArg() > 0 { + // Read from file + content, err := os.ReadFile(flag.Arg(0)) + if err != nil { + log.Fatalf("Failed to read URL file: %v", err) + } + urls = strings.Split(string(content), "\n") + } else { + // Read from stdin + content, err := io.ReadAll(os.Stdin) + if err != nil { + log.Fatalf("Failed to read URLs from stdin: %v", err) + } + urls = strings.Split(string(content), "\n") + } + + // Filter empty lines and deduplicate URLs + uniqueURLs := make(map[string]struct{}) + var filteredURLs []string + for _, url := range urls { + url = strings.TrimSpace(url) + if url == "" || strings.HasPrefix(url, "#") { + continue + } + if _, exists := uniqueURLs[url]; !exists { + uniqueURLs[url] = struct{}{} + filteredURLs = append(filteredURLs, url) + } + } + + if len(filteredURLs) == 0 { + log.Fatal("No valid URLs to download") + } + + if config.Verbose { + log.Printf("Found %d unique URLs to download", len(filteredURLs)) + } + + // Setup HTTP client with reasonable defaults + httpClient := &http.Client{ + Timeout: config.Timeout, + Transport: &http.Transport{ + MaxIdleConnsPerHost: config.Concurrency, + IdleConnTimeout: 90 * time.Second, + DisableCompression: true, // To ensure we get the exact file + }, + } + + // Set up signal handling for graceful shutdown + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a channel to listen for OS signals + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + + // Start a goroutine to handle the signal + go func() { + sig := <-sigChan + log.Printf("Received signal %v, initiating shutdown...", sig) + cancel() + }() + + // Process URLs concurrently + var wg sync.WaitGroup + urlChan := make(chan string, len(filteredURLs)) + + // Start worker goroutines + for i := 0; i < config.Concurrency; i++ { + wg.Add(1) + go func(workerID int) { + defer wg.Done() + for url := range urlChan { + select { + case <-ctx.Done(): + return // Context cancelled, stop processing + default: + if config.Verbose { + log.Printf("Worker %d processing URL: %s", workerID, url) + } + downloadWithRetry(ctx, httpClient, url, config, workerID) + } + } + }(i) + } + + // Send URLs to workers + for _, url := range filteredURLs { + select { + case <-ctx.Done(): + break + case urlChan <- url: + // URL sent to worker + } + } + close(urlChan) + + // Wait for all downloads to complete + wg.Wait() + log.Println("All downloads completed or cancelled") +} + +func downloadWithRetry(ctx context.Context, client *http.Client, url string, config Config, workerID int) { + var err error + for attempt := 0; attempt <= config.RetryCount; attempt++ { + if attempt > 0 { + log.Printf("Retry %d/%d for URL: %s", attempt, config.RetryCount, url) + select { + case <-ctx.Done(): + return + case <-time.After(config.RetryDelay): + // Continue with retry + } + } + + err = downloadURL(ctx, client, url, config, workerID) + if err == nil || err == context.Canceled { + return + } + + log.Printf("Download error (attempt %d/%d): %v", attempt+1, config.RetryCount+1, err) + } + log.Printf("Failed to download after %d attempts: %s - %v", config.RetryCount+1, url, err) +} + +func downloadURL(ctx context.Context, client *http.Client, url string, config Config, workerID int) error { + // Create HTTP request with context for cancellation + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + // Set appropriate headers + req.Header.Set("User-Agent", "URL-Downloader/1.0") + + // Perform the request + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("HTTP error: %s", resp.Status) + } + + // Extract filename from Content-Disposition header or URL + suggestedFilename := "" + contentDisposition := resp.Header.Get("Content-Disposition") + if contentDisposition != "" { + re := regexp.MustCompile(`filename=["']?([^"']+)["']?`) + matches := re.FindStringSubmatch(contentDisposition) + if len(matches) > 1 { + suggestedFilename = matches[1] + } + } + + // If no filename from header, extract from URL + if suggestedFilename == "" { + urlPath := strings.Split(url, "/") + if len(urlPath) > 0 { + suggestedFilename = urlPath[len(urlPath)-1] + // Remove query parameters if present + suggestedFilename = strings.Split(suggestedFilename, "?")[0] + } + } + + // If still no filename, use a generic one with timestamp + if suggestedFilename == "" { + suggestedFilename = fmt.Sprintf("download-%d-%d", workerID, time.Now().Unix()) + } + + // Create temporary file for download + tempFile, err := os.CreateTemp("", "download-*") + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + tempFilePath := tempFile.Name() + defer func() { + tempFile.Close() + // Only remove the temp file if we didn't successfully move it + if _, err := os.Stat(tempFilePath); err == nil { + os.Remove(tempFilePath) + } + }() + + // Calculate SHA-1 while downloading + hash := sha1.New() + writer := io.MultiWriter(tempFile, hash) + + // Download the file + n, err := io.Copy(writer, resp.Body) + if err != nil { + return fmt.Errorf("download failed: %w", err) + } + + // Get the SHA-1 checksum + sha1sum := fmt.Sprintf("%x", hash.Sum(nil)) + + // Check if we already have this file + if config.SkipExisting { + existingPath, exists := findExistingFile(config.MetadataDir, sha1sum) + if exists { + if config.Verbose { + log.Printf("File with SHA-1 %s already exists at %s, skipping", sha1sum, existingPath) + } + return nil + } + } + + // Create the target filename based on SHA-1 + targetFilename := sha1sum + if filepath.Ext(suggestedFilename) != "" { + // Append original extension if available + targetFilename = fmt.Sprintf("%s%s", sha1sum, filepath.Ext(suggestedFilename)) + } + targetPath := filepath.Join(config.OutputDir, targetFilename) + + // Close the temp file before moving it + tempFile.Close() + + // Move the temp file to the target location + if err := os.Rename(tempFilePath, targetPath); err != nil { + return fmt.Errorf("failed to move file: %w", err) + } + + // Parse Last-Modified header + var lastModified time.Time + lastModifiedStr := resp.Header.Get("Last-Modified") + if lastModifiedStr != "" { + lastModified, _ = time.Parse(time.RFC1123, lastModifiedStr) + } + + // Create metadata + metadata := FileMetadata{ + URL: url, + ContentDisposition: contentDisposition, + SuggestedFilename: suggestedFilename, + ActualFilename: targetFilename, + SHA1Checksum: sha1sum, + ETag: resp.Header.Get("ETag"), + LastModified: lastModified, + DownloadedAt: time.Now(), + ContentType: resp.Header.Get("Content-Type"), + ContentLength: n, + } + + // Write metadata to file + metadataPath := filepath.Join(config.MetadataDir, sha1sum+".json") + metadataJSON, err := json.MarshalIndent(metadata, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metadata: %w", err) + } + + if err := os.WriteFile(metadataPath, metadataJSON, 0644); err != nil { + return fmt.Errorf("failed to write metadata: %w", err) + } + + if config.Verbose { + log.Printf("Successfully downloaded %s (%d bytes) to %s", url, n, targetPath) + } + + return nil +} + +// findExistingFile checks if a file with the given SHA-1 checksum already exists +func findExistingFile(metadataDir, sha1sum string) (string, bool) { + metadataPath := filepath.Join(metadataDir, sha1sum+".json") + _, err := os.Stat(metadataPath) + if err != nil { + return "", false + } + + // Read the metadata to get the actual file path + data, err := os.ReadFile(metadataPath) + if err != nil { + return "", false + } + + var metadata FileMetadata + if err := json.Unmarshal(data, &metadata); err != nil { + return "", false + } + + return metadata.ActualFilename, true }